diff --git a/lib/java-speech-api-master.jar b/lib/java-speech-api-master.jar
new file mode 100755
index 00000000..46a7c5a4
Binary files /dev/null and b/lib/java-speech-api-master.jar differ
diff --git a/lib/java-speech-api-master/.classpath b/lib/java-speech-api-master/.classpath
deleted file mode 100755
index fb501163..00000000
--- a/lib/java-speech-api-master/.classpath
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
diff --git a/lib/java-speech-api-master/.gitignore b/lib/java-speech-api-master/.gitignore
deleted file mode 100755
index a56cb9b5..00000000
--- a/lib/java-speech-api-master/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-/bin
-.classpath
diff --git a/lib/java-speech-api-master/.project b/lib/java-speech-api-master/.project
deleted file mode 100755
index ee24677f..00000000
--- a/lib/java-speech-api-master/.project
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
- java-speech-api-git
-
-
-
-
-
- org.eclipse.jdt.core.javabuilder
-
-
-
-
- org.eclipse.jdt.core.javanature
-
-
diff --git a/lib/java-speech-api-master/CHANGELOG.markdown b/lib/java-speech-api-master/CHANGELOG.markdown
deleted file mode 100755
index a331ebc8..00000000
--- a/lib/java-speech-api-master/CHANGELOG.markdown
+++ /dev/null
@@ -1,37 +0,0 @@
-#Java-Speech-API Changelog
-
-##Changelog
-Changelog corresponds with a tagged and signed Git commit. This marks the changes.
-A tagged commit may or may not have a corresponding binary version available.
-Format: Tag: ``
-
-* Version 1.15
- * Optimized synthesiser class. Massive speed improvements on long input strings!
- * Added experimental Duplex API in preparation for version 1.2 .
-
-* Version 1.11 (Tag V1.100)
- * Fixed major bug in Recognizer
-
-* Version 1.10 (Tag v1.100)
- * Added new Microphone Analyzer class.
- * Added volume and frequency detection and frame work for (Voice Activity Detection)
- * Microphone API updated to make it more usable.
- * API re-branded as J.A.R.V.I.S. (Just A Reliable Vocal Interpreter & Synthesiser)
-
-* Version 1.06 (Tag v1.016)
- * Added support for synthesiser for strings longer than 100 characters (Credits to @Skylion007)
- * Added support for synthesiser for multiple languages, accents, and voices. (Credits to @Skylion007)
- * Added support for auto-detection of language within synthesiser. (Credits to @Skylion007)
-
-* Version 1.05 (Tag: v1.015)
- * Improved language support for recognizer (Credits to @duncanj)
- * Add support for multiple responses for recognizer (Credits to @duncanj)
- * Add profanity filter toggle support for recognizer (Credits to @duncanj)
-
-* Version 1.01 (Tag: v1.01)
- * Fixed state functions for Microphones
- * Fixed encoding single byte frames
- * Support Multiple Languages
-
-* Version 1.00 (Tag: v1.00)
- * Initial Release
diff --git a/lib/java-speech-api-master/CREDITS.markdown b/lib/java-speech-api-master/CREDITS.markdown
deleted file mode 100755
index 17c20a7c..00000000
--- a/lib/java-speech-api-master/CREDITS.markdown
+++ /dev/null
@@ -1,23 +0,0 @@
-#J.A.R.V.I.S. Speech API (Java-Speech API) Credits
-
-##Credits
-The following people/organizations have helped provide functionality for the API,
-
-* JavaFlacEncoder Project
- * Provided functionality to convert Wave files to FLAC format
- * This allowed for the FLAC audio to be sent to Google to be "recognized"
- * Created by Preston Lacey
- * Homepage: http://sourceforge.net/projects/javaflacencoder/
-* Google
- * Provided functionality for two main API functions
- * Recognizer
- * Allows for speech audio to be recognized to text
- * Synthesiser
- * Allows for text to speech translation
- * Homepage: http://google.com
-* Princeton University
- * The implemented FFT algorithm is derived from one on the university's website.
- * Homepage: http://www.princeton.edu
-
-We would like to thank the above so much for your work, this wrapper/API could not have been
-created without it.
\ No newline at end of file
diff --git a/lib/java-speech-api-master/LICENSE b/lib/java-speech-api-master/LICENSE
deleted file mode 100755
index ef7e7efc..00000000
--- a/lib/java-speech-api-master/LICENSE
+++ /dev/null
@@ -1,674 +0,0 @@
-GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc.
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU Affero General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- {one line to give the program's name and a brief idea of what it does.}
- Copyright (C) {year} {name of author}
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see .
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- {project} Copyright (C) {year} {fullname}
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-.
diff --git a/lib/java-speech-api-master/README.markdown b/lib/java-speech-api-master/README.markdown
deleted file mode 100755
index f34fee4a..00000000
--- a/lib/java-speech-api-master/README.markdown
+++ /dev/null
@@ -1,30 +0,0 @@
-#J.A.R.V.I.S. (Java-Speech-API)
-
-J.A.R.V.I.S. Java Speech API: Just A Reliable Vocal Interpreter & Synthesizer.
-This is a project for the Java Speech API. The program interprets vocal inputs into text and synthesizes voices from text input.
-The program supports dozens of languages and even has the ability to auto-detect languages!
-
-## Description
-The J.A.R.V.I.S. Speech API is designed to be simple and efficient, using the speech engines created by Google
-to provide functionality for parts of the API. Essentially, it is an API written in Java,
-including a recognizer, synthesizer, and a microphone capture utility. The project uses
-Google services for the synthesizer and recognizer. While this requires an Internet
-connection, it provides a complete, modern, and fully functional speech API in Java.
-
-##Features
-The API currently provides the following functionality,
-
- * Microphone Capture API (Wrapped around the current Java API for simplicity)
- * A speech recognizer using Google's recognizer service
- * Converts WAVE files from microphone input to FLAC (using existing API, see CREDITS)
- * Retrieves Response from Google, including confidence score and text
- * A speech synthesiser using Google's synthesizer service
- * Retrieves synthesized text in an InputStream (MP3 data ready to be played)
- * Wave to FLAC API (Wrapped around the used API in the project, javaFlacEncoder, see CREDITS)
- * A translator using Google Translate (courtesy of Skylion's Google Toolkit)
-
-##Changelog
-See CHANGELOG.markdown for Version History/Changelog
-
-##Credits
-See CREDITS.markdown for Credits
diff --git a/lib/java-speech-api-master/java-speech-api.iml b/lib/java-speech-api-master/java-speech-api.iml
deleted file mode 100755
index ac3e4584..00000000
--- a/lib/java-speech-api-master/java-speech-api.iml
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/lib/java-speech-api-master/src/META-INF/MANIFEST.MF b/lib/java-speech-api-master/src/META-INF/MANIFEST.MF
deleted file mode 100755
index 59499bce..00000000
--- a/lib/java-speech-api-master/src/META-INF/MANIFEST.MF
+++ /dev/null
@@ -1,2 +0,0 @@
-Manifest-Version: 1.0
-
diff --git a/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/Microphone.java b/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/Microphone.java
deleted file mode 100755
index fbc7adfb..00000000
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/Microphone.java
+++ /dev/null
@@ -1,224 +0,0 @@
-package com.darkprograms.speech.microphone;
-
-import javax.sound.sampled.*;
-
-import java.io.Closeable;
-import java.io.File;
-
-/***************************************************************************
- * Microphone class that contains methods to capture audio from microphone
- *
- * @author Luke Kuza, Aaron Gokaslan
- ***************************************************************************/
-public class Microphone implements Closeable{
-
- /**
- * TargetDataLine variable to receive data from microphone
- */
- private TargetDataLine targetDataLine;
-
- /**
- * Enum for current Microphone state
- */
- public enum CaptureState {
- PROCESSING_AUDIO, STARTING_CAPTURE, CLOSED
- }
-
- /**
- * Variable for enum
- */
- CaptureState state;
-
- /**
- * Variable for the audios saved file type
- */
- private AudioFileFormat.Type fileType;
-
- /**
- * Variable that holds the saved audio file
- */
- private File audioFile;
-
- /**
- * Gets the current state of Microphone
- *
- * @return PROCESSING_AUDIO is returned when the Thread is recording Audio and/or saving it to a file
- * STARTING_CAPTURE is returned if the Thread is setting variables
- * CLOSED is returned if the Thread is not doing anything/not capturing audio
- */
- public CaptureState getState() {
- return state;
- }
-
- /**
- * Sets the current state of Microphone
- *
- * @param state State from enum
- */
- private void setState(CaptureState state) {
- this.state = state;
- }
-
- public File getAudioFile() {
- return audioFile;
- }
-
- public void setAudioFile(File audioFile) {
- this.audioFile = audioFile;
- }
-
- public AudioFileFormat.Type getFileType() {
- return fileType;
- }
-
- public void setFileType(AudioFileFormat.Type fileType) {
- this.fileType = fileType;
- }
-
- public TargetDataLine getTargetDataLine() {
- return targetDataLine;
- }
-
- public void setTargetDataLine(TargetDataLine targetDataLine) {
- this.targetDataLine = targetDataLine;
- }
-
-
- /**
- * Constructor
- *
- * @param fileType File type to save the audio in
- * Example, to save as WAVE use AudioFileFormat.Type.WAVE
- */
- public Microphone(AudioFileFormat.Type fileType) {
- setState(CaptureState.CLOSED);
- setFileType(fileType);
- initTargetDataLine();
- }
-
- /**
- * Initializes the target data line.
- */
- private void initTargetDataLine(){
- DataLine.Info dataLineInfo = new DataLine.Info(TargetDataLine.class, getAudioFormat());
- try {
- setTargetDataLine((TargetDataLine) AudioSystem.getLine(dataLineInfo));
- } catch (LineUnavailableException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- return;
- }
-
- }
-
-
- /**
- * Captures audio from the microphone and saves it a file
- *
- * @param audioFile The File to save the audio to
- * @throws LineUnavailableException
- * @throws Exception Throws an exception if something went wrong
- */
- public void captureAudioToFile(File audioFile) throws LineUnavailableException {
- setState(CaptureState.STARTING_CAPTURE);
- setAudioFile(audioFile);
-
- if(getTargetDataLine() == null){
- initTargetDataLine();
- }
-
- //Get Audio
- new Thread(new CaptureThread()).start();
-
-
- }
-
- /**
- * Captures audio from the microphone and saves it a file
- *
- * @param audioFile The fully path (String) to a file you want to save the audio in
- * @throws LineUnavailableException
- * @throws Exception Throws an exception if something went wrong
- */
- public void captureAudioToFile(String audioFile) throws LineUnavailableException {
- File file = new File(audioFile);
- captureAudioToFile(file);
- }
-
-
- /**
- * The audio format to save in
- *
- * @return Returns AudioFormat to be used later when capturing audio from microphone
- */
- public AudioFormat getAudioFormat() {
- float sampleRate = 8000.0F;
- //8000,11025,16000,22050,44100
- int sampleSizeInBits = 16;
- //8,16
- int channels = 1;
- //1,2
- boolean signed = true;
- //true,false
- boolean bigEndian = false;
- //true,false
- return new AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian);
- }
-
- /**
- * Opens the microphone, starting the targetDataLine.
- * If it's already open, it does nothing.
- */
- public void open(){
- if(getTargetDataLine()==null){
- initTargetDataLine();
- }
- if(!getTargetDataLine().isOpen() && !getTargetDataLine().isRunning() && !getTargetDataLine().isActive()){
- try {
- setState(CaptureState.PROCESSING_AUDIO);
- getTargetDataLine().open(getAudioFormat());
- getTargetDataLine().start();
- } catch (LineUnavailableException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- return;
- }
- }
-
- }
-
- /**
- * Close the microphone capture, saving all processed audio to the specified file.
- * If already closed, this does nothing
- */
- public void close() {
- if (getState() == CaptureState.CLOSED) {
- } else {
- getTargetDataLine().stop();
- getTargetDataLine().close();
- setState(CaptureState.CLOSED);
- }
- }
-
- /**
- * Thread to capture the audio from the microphone and save it to a file
- */
- private class CaptureThread implements Runnable {
-
- /**
- * Run method for thread
- */
- public void run() {
- try {
- AudioFileFormat.Type fileType = getFileType();
- File audioFile = getAudioFile();
- open();
- AudioSystem.write(new AudioInputStream(getTargetDataLine()), fileType, audioFile);
- //Will write to File until it's closed.
- } catch (Exception ex) {
- ex.printStackTrace();
- }
- }
- }
-
-}
diff --git a/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/MicrophoneAnalyzer.java b/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/MicrophoneAnalyzer.java
deleted file mode 100755
index 158546ca..00000000
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/MicrophoneAnalyzer.java
+++ /dev/null
@@ -1,288 +0,0 @@
-package com.darkprograms.speech.microphone;
-
-import javax.sound.sampled.AudioFileFormat;
-import com.darkprograms.speech.util.*;
-
-/********************************************************************************************
- * Microphone Analyzer class, detects pitch and volume while extending the microphone class.
- * Implemented as a precursor to a Voice Activity Detection (VAD) algorithm.
- * Currently can be used for audio data analysis.
- * Dependencies: FFT.java & Complex.java. Both found in the utility package.
- * @author Aaron Gokaslan
- ********************************************************************************************/
-
-public class MicrophoneAnalyzer extends Microphone {
-
- /**
- * Constructor
- * @param fileType The file type you want to save in. FLAC recommended.
- */
- public MicrophoneAnalyzer(AudioFileFormat.Type fileType){
- super(fileType);
- }
-
- /**
- * Gets the volume of the microphone input
- * Interval is 100ms so allow 100ms for this method to run in your code or specify smaller interval.
- * @return The volume of the microphone input or -1 if data-line is not available
- */
- public int getAudioVolume(){
- return getAudioVolume(100);
- }
-
- /**
- * Gets the volume of the microphone input
- * @param interval: The length of time you would like to calculate the volume over in milliseconds.
- * @return The volume of the microphone input or -1 if data-line is not available.
- */
- public int getAudioVolume(int interval){
- return calculateAudioVolume(this.getNumOfBytes(interval/1000d));
- }
-
- /**
- * Gets the volume of microphone input
- * @param numOfBytes The number of bytes you want for volume interpretation
- * @return The volume over the specified number of bytes or -1 if data-line is unavailable.
- */
- private int calculateAudioVolume(int numOfBytes){
- byte[] data = getBytes(numOfBytes);
- if(data==null)
- return -1;
- return calculateRMSLevel(data);
- }
-
- /**
- * Calculates the volume of AudioData which may be buffered data from a data-line.
- * @param audioData The byte[] you want to determine the volume of
- * @return the calculated volume of audioData
- */
- public static int calculateRMSLevel(byte[] audioData){
- long lSum = 0;
- for(int i=0; imax){
- max = tmp;;
- index = i;
- }
- }
- return index;
- }
-
- /**
- * Converts bytes from a TargetDataLine into a double[] allowing the information to be read.
- * NOTE: One byte is lost in the conversion so don't expect the arrays to be the same length!
- * @param bufferData The buffer read in from the target data line
- * @return The double[] that the buffer has been converted into.
- */
- private double[] bytesToDoubleArray(byte[] bufferData){
- final int bytesRecorded = bufferData.length;
- final int bytesPerSample = getAudioFormat().getSampleSizeInBits()/8;
- final double amplification = 100.0; // choose a number as you like
- double[] micBufferData = new double[bytesRecorded - bytesPerSample +1];
- for (int index = 0, floatIndex = 0; index < bytesRecorded - bytesPerSample + 1; index += bytesPerSample, floatIndex++) {
- double sample = 0;
- for (int b = 0; b < bytesPerSample; b++) {
- int v = bufferData[index + b];
- if (b < bytesPerSample - 1 || bytesPerSample == 1) {
- v &= 0xFF;
- }
- sample += v << (b * 8);
- }
- double sample32 = amplification * (sample / 32768.0);
- micBufferData[floatIndex] = sample32;
-
- }
- return micBufferData;
- }
-
-}
diff --git a/lib/java-speech-api-master/src/com/darkprograms/speech/recognizer/FlacEncoder.java b/lib/java-speech-api-master/src/com/darkprograms/speech/recognizer/FlacEncoder.java
deleted file mode 100755
index c52f502a..00000000
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/recognizer/FlacEncoder.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package com.darkprograms.speech.recognizer;
-
-import javaFlacEncoder.FLACEncoder;
-import javaFlacEncoder.FLACFileOutputStream;
-import javaFlacEncoder.StreamConfiguration;
-
-import javax.sound.sampled.AudioFormat;
-import javax.sound.sampled.AudioInputStream;
-import javax.sound.sampled.AudioSystem;
-import java.io.File;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-
-/*************************************************************************************************************
- * Class that contains methods to encode Wave files to FLAC files
- * THIS IS THANKS TO THE javaFlacEncoder Project created here: http://sourceforge.net/projects/javaflacencoder/
- ************************************************************************************************************/
-public class FlacEncoder {
-
- /**
- * Constructor
- */
- public FlacEncoder() {
-
- }
-
- /**
- * Converts a wave file to a FLAC file(in order to POST the data to Google and retrieve a response)
- * Sample Rate is 8000 by default
- *
- * @param inputFile Input wave file
- * @param outputFile Output FLAC file
- */
- public void convertWaveToFlac(File inputFile, File outputFile) {
-
-
- StreamConfiguration streamConfiguration = new StreamConfiguration();
- streamConfiguration.setSampleRate(8000);
- streamConfiguration.setBitsPerSample(16);
- streamConfiguration.setChannelCount(1);
-
-
- try {
- AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(inputFile);
- AudioFormat format = audioInputStream.getFormat();
-
- int frameSize = format.getFrameSize();
-
- FLACEncoder flacEncoder = new FLACEncoder();
- FLACFileOutputStream flacOutputStream = new FLACFileOutputStream(outputFile);
-
- flacEncoder.setStreamConfiguration(streamConfiguration);
- flacEncoder.setOutputStream(flacOutputStream);
-
- flacEncoder.openFLACStream();
-
- int frameLength = (int) audioInputStream.getFrameLength();
- if(frameLength <= AudioSystem.NOT_SPECIFIED){
- frameLength = 16384;//Arbitrary file size
- }
- int[] sampleData = new int[frameLength];
- byte[] samplesIn = new byte[frameSize];
-
- int i = 0;
-
- while (audioInputStream.read(samplesIn, 0, frameSize) != -1) {
- if (frameSize != 1) {
- ByteBuffer bb = ByteBuffer.wrap(samplesIn);
- bb.order(ByteOrder.LITTLE_ENDIAN);
- short shortVal = bb.getShort();
- sampleData[i] = shortVal;
- } else {
- sampleData[i] = samplesIn[0];
- }
-
- i++;
- }
-
- sampleData = truncateNullData(sampleData, i);
-
- flacEncoder.addSamples(sampleData, i);
- flacEncoder.encodeSamples(i, false);
- flacEncoder.encodeSamples(flacEncoder.samplesAvailableToEncode(), true);
-
- audioInputStream.close();
- flacOutputStream.close();
-
- } catch (Exception ex) {
- ex.printStackTrace();
- }
- }
-
-
- /**
- * Converts a wave file to a FLAC file(in order to POST the data to Google and retrieve a response)
- * Sample Rate is 8000 by default
- *
- * @param inputFile Input wave file
- * @param outputFile Output FLAC file
- */
- public void convertWaveToFlac(String inputFile, String outputFile) {
- convertWaveToFlac(new File(inputFile), new File(outputFile));
- }
-
- /**
- * Used for when the frame length is unknown to shorten the array to prevent huge blank end space
- * @param sampleData The int[] array you want to shorten
- * @param index The index you want to shorten it to
- * @return The shortened array
- */
- private int[] truncateNullData(int[] sampleData, int index){
- if(index == sampleData.length) return sampleData;
- int[] out = new int[index];
- for(int i = 0; i responseListeners = new ArrayList();
-
- /**
- * User defined API-KEY
- */
- private final String API_KEY;
-
- /**
- * User-defined language
- */
- private String language = "auto";
-
- /**
- * The maximum size the API will tolerate
- */
- private final static int MAX_SIZE = 1048576;
-
- /**
- * Per specification, the final chunk of in a ChunkedOutputStream
- */
- private final static byte[] FINAL_CHUNK = new byte[] { '0', '\r', '\n', '\r', '\n' };
-
- /**
- * Constructor
- * @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting
- * one by following the process shown at this
- * url.
- */
- public GSpeechDuplex(String API_KEY){
- this.API_KEY = API_KEY;
- }
-
- /**
- * Temporary will be deprecated before release
- */
- public String getLanguage(){
- return language;
- }
-
- /**
- * Temporary will be deprecated before release
- */
- public void setLanguage(String language){
- this.language = language;
- }
-
- /**
- * Send a FLAC file with the specified sampleRate to the Duplex API
- * @param flacFile The file you wish to upload.
- * NOTE: Segment the file if duration is greater than 15 seconds.
- * @param sampleRate The sample rate of the file.
- * @throws IOException If something has gone wrong with reading the file
- */
- public void recognize(File flacFile, int sampleRate) throws IOException{
- recognize(mapFileIn(flacFile), sampleRate);
- }
-
- /**
- * Send a byte[] to the URL with a specified sampleRate.
- * NOTE: The byte[] should contain no more than 15 seconds of audio.
- * Chunking is not fully implemented as of yet. Will not string data together for context yet.
- * @param data The byte[] you want to send.
- * @param sampleRate The sample rate of aforementioned byte array.
- */
- public void recognize(byte[] data, int sampleRate){
-
- if(data.length >= MAX_SIZE){//Temporary Chunking. Does not allow for Google to gather context.
- System.out.println("Chunking the audio into smaller parts...");
- byte[][] dataArray = chunkAudio(data);
- for(byte[]array: dataArray){
- recognize(array, sampleRate);
- }
- }
-
- //Generates a unique ID for the response.
- final long PAIR = MIN + (long)(Math.random() * ((MAX - MIN) + 1L));
-
- //Generates the Downstream URL
- final String API_DOWN_URL = GOOGLE_DUPLEX_SPEECH_BASE + "down?maxresults=1&pair=" + PAIR;
-
- //Generates the Upstream URL
- final String API_UP_URL = GOOGLE_DUPLEX_SPEECH_BASE +
- "up?lang=" + language + "&lm=dictation&client=chromium&pair=" + PAIR +
- "&key=" + API_KEY ;
-
- //Opens downChannel
- this.downChannel(API_DOWN_URL);
- //Opens upChannel
- this.upChannel(API_UP_URL, chunkAudio(data), sampleRate);
- }
-
- /**
- * This method allows you to stream a continuous stream of data to the API.
- *
Note: This feature is experimental.
- * @param tl
- * @param af
- * @throws IOException
- * @throws LineUnavailableException
- */
- public void recognize(TargetDataLine tl, AudioFormat af) throws IOException, LineUnavailableException{
- //Generates a unique ID for the response.
- final long PAIR = MIN + (long)(Math.random() * ((MAX - MIN) + 1L));
-
- //Generates the Downstream URL
- final String API_DOWN_URL = GOOGLE_DUPLEX_SPEECH_BASE + "down?maxresults=1&pair=" + PAIR;
-
- //Generates the Upstream URL
- final String API_UP_URL = GOOGLE_DUPLEX_SPEECH_BASE +
- "up?lang=" + language + "&lm=dictation&client=chromium&pair=" + PAIR +
- "&key=" + API_KEY + "&continuous"; //Tells Google to constantly monitor the stream;
-
- //TODO Add implementation that sends feedback in real time. Protocol buffers will be necessary.
-
- //Opens downChannel
- this.downChannel(API_DOWN_URL);
- //Opens upChannel
- this.upChannel(API_UP_URL, tl, af);
- }
-
- /**
- * This code opens a new Thread that connects to the downstream URL. Due to threading,
- * the best way to handle this is through the use of listeners.
- * @param The URL you want to connect to.
- */
- private void downChannel(String urlStr) {
- final String url = urlStr;
- new Thread ("Downstream Thread") {
- public void run() {
- // handler for DOWN channel http response stream - httpsUrlConn
- // response handler should manage the connection.... ??
- // assign a TIMEOUT Value that exceeds by a safe factor
- // the amount of time that it will take to write the bytes
- // to the UPChannel in a fashion that mimics a liveStream
- // of the audio at the applicable Bitrate. BR=sampleRate * bits per sample
- // Note that the TLS session uses "* SSLv3, TLS alert, Client hello (1): "
- // to wake up the listener when there are additional bytes.
- // The mechanics of the TLS session should be transparent. Just use
- // httpsUrlConn and allow it enough time to do its work.
- Scanner inStream = openHttpsConnection(url);
- if(inStream == null){
- //ERROR HAS OCCURED
- }
- while(inStream.hasNextLine()){
- String response = inStream.nextLine();
- System.out.println("Response: "+response);
- if(response.length()>17){//Prevents blank responses from Firing
- GoogleResponse gr = new GoogleResponse();
- parseResponse(response, gr);
- fireResponseEvent(gr);
- }
-
- }
- inStream.close();
- System.out.println("Finished write on down stream...");
- }
- }.start();
- }
-
-
- /**
- * Used to initiate the URL chunking for the upChannel.
- * @param urlStr The URL string you want to upload 2
- * @param data The data you want to send to the URL
- * @param sampleRate The specified sample rate of the data.
- */
- private void upChannel(String urlStr, byte[][] data, int sampleRate) {
- final String murl = urlStr;
- final byte[][] mdata = data;
- final int mSampleRate = sampleRate;
- new Thread ("Upstream File Thread") {
- public void run() {
- openHttpsPostConnection(murl, mdata, mSampleRate);
- //Google does not return data via this URL
- }
- }.start();
- }
-
- /**
- * Streams data from the TargetDataLine to the API.
- * @param urlStr The URL to stream to
- * @param tl The target data line to stream from.
- * @param af The AudioFormat to stream with.
- * @throws LineUnavailableException If cannot open or stream the TargetDataLine.
- */
- private void upChannel(String urlStr, TargetDataLine tl, AudioFormat af) throws LineUnavailableException{
- final String murl = urlStr;
- final TargetDataLine mtl = tl;
- final AudioFormat maf = af;
- if(!mtl.isOpen()){
- mtl.open(maf);
- mtl.start();
- }
- new Thread ("Upstream Thread") {
- public void run() {
- openHttpsPostConnection(murl, mtl, maf);
- }
-
- }.start();
-
- }
-
- /**
- * Opens a HTTPS connection to the specified URL string
- * @param urlStr The URL you want to visit
- * @return The Scanner to access aforementioned data.
- */
- private Scanner openHttpsConnection(String urlStr) {
- int resCode = -1;
- try {
-
-
- URL url = new URL(urlStr);
- URLConnection urlConn = url.openConnection();
- if (!(urlConn instanceof HttpsURLConnection)) {
- throw new IOException ("URL is not an Https URL");
- }
- HttpsURLConnection httpConn = (HttpsURLConnection)urlConn;
- httpConn.setAllowUserInteraction(false);
- // TIMEOUT is required
- httpConn.setInstanceFollowRedirects(true);
- httpConn.setRequestMethod("GET");
-
- httpConn.connect();
- resCode = httpConn.getResponseCode();
- if (resCode == HttpsURLConnection.HTTP_OK) {
- return new Scanner(httpConn.getInputStream());
- }
- else{
- System.out.println("Error: " + resCode);
- }
- } catch (MalformedURLException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- return null;
- }
-
- /**
- * Opens a HTTPSPostConnection that posts data from a TargetDataLine input
- * @param murl The URL you want to post to.
- * @param mtl The TargetDataLine you want to post data from. Note should be open
- * @param maf The AudioFormat of the data you want to post
- */
- private void openHttpsPostConnection(final String murl,
- final TargetDataLine mtl, final AudioFormat maf) {
- URL url;
- try {
- url = new URL(murl);
- URLConnection urlConn = url.openConnection();
- if (!(urlConn instanceof HttpsURLConnection)) {
- throw new IOException ("URL is not an Https URL");
- }
- HttpsURLConnection httpConn = (HttpsURLConnection)urlConn;
- httpConn.setAllowUserInteraction(false);
- httpConn.setInstanceFollowRedirects(true);
- httpConn.setRequestMethod("POST");
- httpConn.setDoOutput(true);
- httpConn.setChunkedStreamingMode(0);
- httpConn.setRequestProperty("Transfer-Encoding", "chunked");
- httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + (int)maf.getSampleRate());
- // also worked with ("Content-Type", "audio/amr; rate=8000");
- httpConn.connect();
-
- // this opens a connection, then sends POST & headers.
- OutputStream out = httpConn.getOutputStream();
- //Note : if the audio is more than 15 seconds
- // dont write it to UrlConnInputStream all in one block as this sample does.
- // Rather, segment the byteArray and on intermittently, sleeping thread
- // supply bytes to the urlConn Stream at a rate that approaches
- // the bitrate ( =30K per sec. in this instance ).
- System.out.println("Starting to write data to output...");
- AudioInputStream ais = new AudioInputStream(mtl);
- ChunkedOutputStream os = new ChunkedOutputStream(out);
- AudioSystem.write(ais, FLACFileWriter.FLAC, os);
- out.write(FINAL_CHUNK);
- System.out.println("IO WRITE DONE");
- out.close();
- // do you need the trailer?
- // NOW you can look at the status.
- int resCode = httpConn.getResponseCode();
- if (resCode / 100 != 2) {
- System.out.println("ERROR");
- }
- }catch(Exception ex){
- ex.printStackTrace();
-
- }
- }
-
- /**
- * Opens a chunked HTTPS post connection and returns a Scanner with incoming data from Google Server
- * Used for to get UPStream
- * Chunked HTTPS ensures unlimited file size.
- * @param urlStr The String for the URL
- * @param data The data you want to send the server
- * @param sampleRate The sample rate of the flac file.
- * @return A Scanner to access the server response. (Probably will never be used)
- */
- private Scanner openHttpsPostConnection(String urlStr, byte[][] data, int sampleRate){
- byte[][] mextrad = data;
- int resCode = -1;
- OutputStream out = null;
- // int http_status;
- try {
- URL url = new URL(urlStr);
- URLConnection urlConn = url.openConnection();
- if (!(urlConn instanceof HttpsURLConnection)) {
- throw new IOException ("URL is not an Https URL");
- }
- HttpsURLConnection httpConn = (HttpsURLConnection)urlConn;
- httpConn.setAllowUserInteraction(false);
- httpConn.setInstanceFollowRedirects(true);
- httpConn.setRequestMethod("POST");
- httpConn.setDoOutput(true);
- httpConn.setChunkedStreamingMode(0);
- httpConn.setRequestProperty("Transfer-Encoding", "chunked");
- httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate);
- // also worked with ("Content-Type", "audio/amr; rate=8000");
- httpConn.connect();
- try {
- // this opens a connection, then sends POST & headers.
- out = httpConn.getOutputStream();
- //Note : if the audio is more than 15 seconds
- // dont write it to UrlConnInputStream all in one block as this sample does.
- // Rather, segment the byteArray and on intermittently, sleeping thread
- // supply bytes to the urlConn Stream at a rate that approaches
- // the bitrate ( =30K per sec. in this instance ).
- System.out.println("Starting to write");
- for(byte[] dataArray: mextrad){
- out.write(dataArray); // one big block supplied instantly to the underlying chunker wont work for duration > 15 s.
- try {
- Thread.sleep(1000);//Delays the Audio so Google thinks its a mic.
- } catch (InterruptedException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- out.write(FINAL_CHUNK);
- System.out.println("IO WRITE DONE");
- // do you need the trailer?
- // NOW you can look at the status.
- resCode = httpConn.getResponseCode();
- if (resCode / 100 != 2) {
- System.out.println("ERROR");
- }
- } catch (IOException e) {
-
- }
- if (resCode == HttpsURLConnection.HTTP_OK) {
- return new Scanner(httpConn.getInputStream());
- }
- else{
- System.out.println("HELP: " + resCode);
- }
- } catch (MalformedURLException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- return null;
- }
-
- /**
- * Converts the file into a byte[]. Also Android compatible. :)
- * @param The File you want to get the byte[] from.
- * @return The byte[]
- * @throws IOException if something goes wrong in reading the file.
- */
- private byte[] mapFileIn(File infile) throws IOException{
- return Files.readAllBytes(infile.toPath());
- }
-
- /**
- * Parses the String into a GoogleResponse object
- * @param rawResponse The String you want to parse
- * @param gr the GoogleResponse object to save the data into.
- */
- private void parseResponse(String rawResponse, GoogleResponse gr){
- if(rawResponse == null || !rawResponse.contains("\"result\"")
- || rawResponse.equals("{\"result\":[]}")){ return; }
- if(rawResponse.contains("\"confidence\":")){
- String confidence = StringUtil.substringBetween(rawResponse, "\"confidence\":", "}");
- gr.setConfidence(confidence);
- }
- else{
- gr.setConfidence(String.valueOf(1d));
- }
- String array = StringUtil.trimString(rawResponse, "[", "]");
- if(array.contains("[")){
- array = StringUtil.trimString(array, "[", "]");
- }
- if(array.contains("\"confidence\":")){//Removes confidence phrase if it exists.
- array = array.substring(0, array.lastIndexOf(','));
- }
- String[] parts = array.split(",");
- gr.setResponse(parseTranscript(parts[0]));
- for(int i = 1; i= MAX_SIZE){//If larger than 1MB
- int frame = MAX_SIZE/2;
- int numOfChunks = (int)(data.length/((double)frame)) + 1;
- byte[][] data2D = new byte[numOfChunks][];
- for(int i = 0, j = 0; i otherPossibleResponses = new ArrayList(20);
-
- /**
- * Constructor
- */
- public GoogleResponse() {
-
- }
-
-
- /**
- * Gets the response text of what was said in the submitted Audio to Google
- *
- * @return String representation of what was said
- */
- public String getResponse() {
- return response;
- }
-
- /**
- * Set the response
- *
- * @param response The response
- */
- protected void setResponse(String response) {
- this.response = response;
- }
-
- /**
- * Gets the confidence score for the specific request
- *
- * @return The confidence score, ex .922343324323
- */
- public String getConfidence() {
- return confidence;
- }
-
- /**
- * Set the confidence score for this request
- *
- * @param confidence The confidence score
- */
- protected void setConfidence(String confidence) {
- this.confidence = confidence;
- }
-
- /**
- * Get other possible responses for this request.
- * @return other possible responses
- */
- public List getOtherPossibleResponses() {
- return otherPossibleResponses;
- }
-
- /**
- * Gets all returned responses for this request
- * @return All returned responses
- */
- public List getAllPossibleResponses() {
- List tmp = otherPossibleResponses;
- tmp.add(0,response);
- return tmp;
- }
-
-}
diff --git a/lib/java-speech-api-master/src/com/darkprograms/speech/recognizer/Recognizer.java b/lib/java-speech-api-master/src/com/darkprograms/speech/recognizer/Recognizer.java
deleted file mode 100755
index f5d6ab16..00000000
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/recognizer/Recognizer.java
+++ /dev/null
@@ -1,466 +0,0 @@
-package com.darkprograms.speech.recognizer;
-
-import java.io.*;
-import java.net.URL;
-import java.net.URLConnection;
-import java.nio.charset.Charset;
-
-import com.darkprograms.speech.util.StringUtil;
-
-/***************************************************************
- * Class that submits FLAC audio and retrieves recognized text
- *
- * @author Luke Kuza, Duncan Jauncey, Aaron Gokaslan
- **************************************************************/
-@Deprecated
-public class Recognizer {
-
- @Deprecated
- public enum Languages{
- AUTO_DETECT("auto"),//tells Google to auto-detect the language
- ARABIC_JORDAN("ar-JO"),
- ARABIC_LEBANON("ar-LB"),
- ARABIC_QATAR("ar-QA"),
- ARABIC_UAE("ar-AE"),
- ARABIC_MOROCCO("ar-MA"),
- ARABIC_IRAQ("ar-IQ"),
- ARABIC_ALGERIA("ar-DZ"),
- ARABIC_BAHRAIN("ar-BH"),
- ARABIC_LYBIA("ar-LY"),
- ARABIC_OMAN("ar-OM"),
- ARABIC_SAUDI_ARABIA("ar-SA"),
- ARABIC_TUNISIA("ar-TN"),
- ARABIC_YEMEN("ar-YE"),
- BASQUE("eu"),
- CATALAN("ca"),
- CZECH("cs"),
- DUTCH("nl-NL"),
- ENGLISH_AUSTRALIA("en-AU"),
- ENGLISH_CANADA("en-CA"),
- ENGLISH_INDIA("en-IN"),
- ENGLISH_NEW_ZEALAND("en-NZ"),
- ENGLISH_SOUTH_AFRICA("en-ZA"),
- ENGLISH_UK("en-GB"),
- ENGLISH_US("en-US"),
- FINNISH("fi"),
- FRENCH("fr-FR"),
- GALICIAN("gl"),
- GERMAN("de-DE"),
- HEBREW("he"),
- HUNGARIAN("hu"),
- ICELANDIC("is"),
- ITALIAN("it-IT"),
- INDONESIAN("id"),
- JAPANESE("ja"),
- KOREAN("ko"),
- LATIN("la"),
- CHINESE_SIMPLIFIED("zh-CN"),
- CHINESE_TRANDITIONAL("zh-TW"),
- CHINESE_HONGKONG("zh-HK"),
- CHINESE_CANTONESE("zh-yue"),
- MALAYSIAN("ms-MY"),
- NORWEGIAN("no-NO"),
- POLISH("pl"),
- PIG_LATIN("xx-piglatin"),
- PORTUGUESE("pt-PT"),
- PORTUGUESE_BRASIL("pt-BR"),
- ROMANIAN("ro-RO"),
- RUSSIAN("ru"),
- SERBIAN("sr-SP"),
- SLOVAK("sk"),
- SPANISH_ARGENTINA("es-AR"),
- SPANISH_BOLIVIA("es-BO"),
- SPANISH_CHILE("es-CL"),
- SPANISH_COLOMBIA("es-CO"),
- SPANISH_COSTA_RICA("es-CR"),
- SPANISH_DOMINICAN_REPUBLIC("es-DO"),
- SPANISH_ECUADOR("es-EC"),
- SPANISH_EL_SALVADOR("es-SV"),
- SPANISH_GUATEMALA("es-GT"),
- SPANISH_HONDURAS("es-HN"),
- SPANISH_MEXICO("es-MX"),
- SPANISH_NICARAGUA("es-NI"),
- SPANISH_PANAMA("es-PA"),
- SPANISH_PARAGUAY("es-PY"),
- SPANISH_PERU("es-PE"),
- SPANISH_PUERTO_RICO("es-PR"),
- SPANISH_SPAIN("es-ES"),
- SPANISH_US("es-US"),
- SPANISH_URUGUAY("es-UY"),
- SPANISH_VENEZUELA("es-VE"),
- SWEDISH("sv-SE"),
- TURKISH("tr"),
- ZULU("zu");
-
- //TODO Clean Up JavaDoc for Overloaded Methods using @link
-
- /**
- *Stores the LanguageCode
- */
- private final String languageCode;
-
- /**
- *Constructor
- */
- private Languages(final String languageCode){
- this.languageCode = languageCode;
- }
-
- public String toString(){
- return languageCode;
- }
-
- }
-
-
- /**
- * URL to POST audio data and retrieve results
- */
- private static final String GOOGLE_RECOGNIZER_URL = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium";
-
- private boolean profanityFilter = true;
- private String language = null;
-
- /**
- * Constructor
- */
- public Recognizer() {
- this.setLanguage(Languages.AUTO_DETECT);
- }
-
- /**
- * Constructor
- * @param Language
- */
- @Deprecated
- public Recognizer(String language) {
- this.language = language;
- }
-
- /**
- * Constructor
- * @param language The Languages class for the language you want to designate
- */
- public Recognizer(Languages language){
- this.language = language.languageCode;
- }
-
- /**
- * Constructor
- * @param profanityFilter
- */
- public Recognizer(boolean profanityFilter){
- this.profanityFilter = profanityFilter;
- }
-
- /**
- * Constructor
- * @param language
- * @param profanityFilter
- */
- @Deprecated
- public Recognizer(String language, boolean profanityFilter){
- this.language = language;
- this.profanityFilter = profanityFilter;
- }
-
- /**
- * Constructor
- * @param language
- * @param profanityFilter
- */
- public Recognizer(Languages language, boolean profanityFilter){
- this.language = language.languageCode;
- this.profanityFilter = profanityFilter;
- }
-
- /**
- * Language: Contains all supported languages for Google Speech to Text.
- * Setting this to null will make Google use it's own language detection.
- * This value is null by default.
- * @param language
- */
- public void setLanguage(Languages language) {
- this.language = language.languageCode;
- }
-
- /**Language code. This language code must match the language of the speech to be recognized. ex. en-US ru-RU
- * This value is null by default.
- * @param language The language code.
- */
- @Deprecated
- public void setLanguage(String language) {
- this.language = language;
- }
-
- /**
- * Returns the state of profanityFilter
- * which enables/disables Google's profanity filter (on by default).
- * @return profanityFilter
- */
- public boolean getProfanityFilter(){
- return profanityFilter;
- }
-
- /**
- * Language code. This language code must match the language of the speech to be recognized. ex. en-US ru-RU
- * This value is null by default.
- * @return language the Google language
- */
- public String getLanguage(){
- return language;
- }
-
- /**
- * Get recognized data from a Wave file. This method will encode the wave file to a FLAC file
- *
- * @param waveFile Wave file to recognize
- * @param maxResults Maximum number of results to return in response
- * @return Returns a GoogleResponse, with the response and confidence score
- * @throws IOException Throws exception if something goes wrong
- */
- public GoogleResponse getRecognizedDataForWave(File waveFile, int maxResults) throws IOException{
- FlacEncoder flacEncoder = new FlacEncoder();
- File flacFile = new File(waveFile + ".flac");
-
- flacEncoder.convertWaveToFlac(waveFile, flacFile);
-
- GoogleResponse googleResponse = getRecognizedDataForFlac(flacFile, maxResults, 8000);
-
- //Delete converted FLAC data
- flacFile.delete();
- return googleResponse;
- }
-
- /**
- * Get recognized data from a Wave file. This method will encode the wave file to a FLAC
- *
- * @param waveFile Wave file to recognize
- * @param maxResults the maximum number of results to return in the response
- * NOTE: Sample rate of file must be 8000 unless a custom sample rate is specified.
- * @return Returns a GoogleResponse, with the response and confidence score
- * @throws IOException Throws exception if something goes wrong
- */
- public GoogleResponse getRecognizedDataForWave(String waveFile, int maxResults) throws IOException {
- return getRecognizedDataForWave(new File(waveFile), maxResults);
- }
-
- /**
- * Get recognized data from a FLAC file.
- *
- * @param flacFile FLAC file to recognize
- * @param maxResults the maximum number of results to return in the response
- * NOTE: Sample rate of file must be 8000 unless a custom sample rate is specified.
- * @return Returns a GoogleResponse, with the response and confidence score
- * @throws IOException Throws exception if something goes wrong
- */
- public GoogleResponse getRecognizedDataForFlac(File flacFile, int maxResults) throws IOException {
- return getRecognizedDataForFlac(flacFile, maxResults, 8000);
- }
-
- /**
- * Get recognized data from a FLAC file.
- *
- * @param flacFile FLAC file to recognize
- * @param maxResults the maximum number of results to return in the response
- * @param samepleRate The sampleRate of the file. Default is 8000.
- * @return Returns a GoogleResponse, with the response and confidence score
- * @throws IOException Throws exception if something goes wrong
- */
- public GoogleResponse getRecognizedDataForFlac(File flacFile, int maxResults, int sampleRate) throws IOException{
- String response = rawRequest(flacFile, maxResults, sampleRate);
- GoogleResponse googleResponse = new GoogleResponse();
- parseResponse(response, googleResponse);
- return googleResponse;
- }
-
- /**
- * Get recognized data from a FLAC file.
- *
- * @param flacFile FLAC file to recognize
- * @param maxResults the maximum number of results to return in the response
- * @param samepleRate The sampleRate of the file. Default is 8000.
- * @return Returns a GoogleResponse, with the response and confidence score
- * @throws IOException Throws exception if something goes wrong
- */
- public GoogleResponse getRecognizedDataForFlac(String flacFile, int maxResults, int sampleRate) throws IOException{
- return getRecognizedDataForFlac(new File(flacFile), maxResults, sampleRate);
- }
-
- /**
- * Get recognized data from a FLAC file.
- *
- * @param flacFile FLAC file to recognize
- * @param maxResults the maximum number of results to return in the response
- * @return Returns a GoogleResponse, with the response and confidence score
- * @throws IOException Throws exception if something goes wrong
- */
- public GoogleResponse getRecognizedDataForFlac(String flacFile, int maxResults) throws IOException {
- return getRecognizedDataForFlac(new File(flacFile), maxResults);
- }
-
- /**
- * Get recognized data from a Wave file. This method will encode the wave file to a FLAC.
- * This method will automatically set the language to en-US, or English
- *
- * @param waveFile Wave file to recognize
- * @return Returns a GoogleResponse, with the response and confidence score
- * @throws IOException Throws exception if something goes wrong
- */
- public GoogleResponse getRecognizedDataForWave(File waveFile) throws IOException {
- return getRecognizedDataForWave(waveFile, 1);
- }
-
- /**
- * Get recognized data from a Wave file. This method will encode the wave file to a FLAC.
- * This method will automatically set the language to en-US, or English
- *
- * @param waveFile Wave file to recognize
- * @return Returns a GoogleResponse, with the response and confidence score
- * @throws IOException Throws exception if something goes wrong
- */
- public GoogleResponse getRecognizedDataForWave(String waveFile) throws IOException {
- return getRecognizedDataForWave(waveFile, 1);
- }
-
- /**
- * Get recognized data from a FLAC file.
- * This method will automatically set the language to en-US, or English
- *
- * @param flacFile FLAC file to recognize
- * @return Returns a GoogleResponse, with the response and confidence score
- * @throws IOException Throws exception if something goes wrong
- */
- public GoogleResponse getRecognizedDataForFlac(File flacFile) throws IOException {
- return getRecognizedDataForFlac(flacFile, 1);
- }
-
- /**
- * Get recognized data from a FLAC file.
- * This method will automatically set the language to en-US, or English
- *
- * @param flacFile FLAC file to recognize
- * @return Returns a GoogleResponse, with the response and confidence score
- * @throws IOException Throws exception if something goes wrong
- */
- public GoogleResponse getRecognizedDataForFlac(String flacFile) throws IOException {
- return getRecognizedDataForFlac(flacFile, 1);
- }
-
- /**
- * Parses the raw response from Google
- *
- * @param rawResponse The raw, unparsed response from Google
- * @return Returns the parsed response in the form of a Google Response.
- */
- private void parseResponse(String rawResponse, GoogleResponse googleResponse) {
- if (rawResponse == null || !rawResponse.contains("utterance"))
- return;
-
- String array = StringUtil.substringBetween(rawResponse, "[", "]");
- String[] parts = array.split("}");
-
- boolean first = true;
- for( String s : parts ) {
- if( first ) {
- first = false;
- String utterancePart = s.split(",")[0];
- String confidencePart = s.split(",")[1];
-
- String utterance = utterancePart.split(":")[1];
- String confidence = confidencePart.split(":")[1];
-
- utterance = StringUtil.stripQuotes(utterance);
- confidence = StringUtil.stripQuotes(confidence);
-
- if( utterance.equals("null") ) {
- utterance = null;
- }
- if( confidence.equals("null") ) {
- confidence = null;
- }
-
- googleResponse.setResponse(utterance);
- googleResponse.setConfidence(confidence);
- } else {
- String utterance = s.split(":")[1];
- utterance = StringUtil.stripQuotes(utterance);
- if( utterance.equals("null") ) {
- utterance = null;
- }
- googleResponse.getOtherPossibleResponses().add(utterance);
- }
- }
- }
-
- /**
- * Performs the request to Google with a file
- * Request is buffered
- *
- * @param inputFile Input files to recognize
- * @return Returns the raw, unparsed response from Google
- * @throws IOException Throws exception if something went wrong
- */
- private String rawRequest(File inputFile, int maxResults, int sampleRate) throws IOException{
- URL url;
- URLConnection urlConn;
- OutputStream outputStream;
- BufferedReader br;
-
- StringBuilder sb = new StringBuilder(GOOGLE_RECOGNIZER_URL);
- if( language != null ) {
- sb.append("&lang=");
- sb.append(language);
- }
- else{
- sb.append("&lang=auto");
- }
- if( !profanityFilter ) {
- sb.append("&pfilter=0");
- }
- sb.append("&maxresults=");
- sb.append(maxResults);
-
- // URL of Remote Script.
- url = new URL(sb.toString());
-
-
- // Open New URL connection channel.
- urlConn = url.openConnection();
-
- // we want to do output.
- urlConn.setDoOutput(true);
-
- // No caching
- urlConn.setUseCaches(false);
-
- // Specify the header content type.
- urlConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate);
-
- // Send POST output.
- outputStream = urlConn.getOutputStream();
-
-
- FileInputStream fileInputStream = new FileInputStream(inputFile);
-
- byte[] buffer = new byte[256];
-
- while ((fileInputStream.read(buffer, 0, 256)) != -1) {
- outputStream.write(buffer, 0, 256);
- }
-
- fileInputStream.close();
- outputStream.close();
-
- // Get response data.
- br = new BufferedReader(new InputStreamReader(urlConn.getInputStream(), Charset.forName("UTF-8")));
-
- String response = br.readLine();
-
- br.close();
-
- return response;
-
- }
-
-}
diff --git a/lib/java-speech-api-master/src/com/darkprograms/speech/recognizer/RecognizerChunked.java b/lib/java-speech-api-master/src/com/darkprograms/speech/recognizer/RecognizerChunked.java
deleted file mode 100755
index ab4a999d..00000000
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/recognizer/RecognizerChunked.java
+++ /dev/null
@@ -1,282 +0,0 @@
-package com.darkprograms.speech.recognizer;
-
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.net.URLConnection;
-import java.nio.ByteBuffer;
-import java.nio.MappedByteBuffer;
-import java.nio.channels.FileChannel;
-import java.util.ArrayList;
-import java.util.List;
-
-import javax.net.ssl.HttpsURLConnection;
-import javax.xml.ws.http.HTTPException;
-
-import com.darkprograms.speech.util.StringUtil;
-
-/**
- * This class uses Google's V2 Hook. The class is returns a chunked respones so listeners must be used.
- * The class also requires an API-Key (see Constructor) for details. This class is experimental and
- * subject to change as we restructure the API.
- * @author Aaron Gokaslan (Skylion)
- */
-public class RecognizerChunked {
-
- /**
- * Google's API V2 URL
- */
- private static final String GOOGLE_SPEECH_URL_V2 = "https://www.google.com/speech-api/v2/recognize";
-
- /**
- * API-Key used for requests
- */
- private final String API_KEY;
-
- /**
- * The language code Google uses to determine the language
- * Default value is "auto"
- */
- private String language;
-
- /**
- * Stores the Response Listeners
- */
- private List responseListeners = new ArrayList();
-
- /**
- * Constructor
- * @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting
- * one by following the process shown at this
- * url.
- */
- public RecognizerChunked(String API_KEY){
- this.API_KEY = API_KEY;
- this.language = "auto";
- }
-
- /**
- * Constructor
- * @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting
- * one by following the process shown at this
- * url.
- * @param language The language you want to use (Iso code)
- * Note: This function will most likely be deprecated.
- */
- public RecognizerChunked(String API_KEY, String language){
- this(API_KEY);
- this.language = language;
- }
-
- /**
- * The current language the Recognizer is set to use. Returns the ISO-Code otherwise,
- * it may return "auto."
- * @return The ISO-Code or auto if the language the is not specified.
- */
- public String getLanguage(){
- return language;
- }
-
- /**
- * Sets the language that the file should return.
- * @param language The language as an ISO-Code
- */
- public void setLanguage(String language){
- this.language = language;
- }
-
- /**
- * Analyzes the file for speech
- * @param infile The file you want to analyze for speech.
- * @param sampleRate The sample rate of the audioFile.
- * @throws IOException if something goes wrong reading the file.
- */
- public void getRecognizedDataForFlac(File infile, int sampleRate) throws IOException{
- byte[] data = mapFileIn(infile);
- getRecognizedDataForFlac(data, sampleRate);
- }
-
- /**
- * Analyzes the file for speech
- * @param infile The file you want to analyze for speech.
- * @param sampleRate The sample rate of the audioFile.
- * @throws IOException if something goes wrong reading the file.
- */
- public void getRecognizedDataForFlac(String inFile, int sampleRate) throws IOException{
- getRecognizedDataForFlac(new File(inFile), sampleRate);
- }
-
- /**
- * Recognizes the byte data.
- * @param data
- * @param sampleRate
- */
- public void getRecognizedDataForFlac(byte[] data, int sampleRate){
- StringBuilder sb = new StringBuilder(GOOGLE_SPEECH_URL_V2);
- sb.append("?output=json");
- sb.append("&client=chromium");
- sb.append("&lang=" + language);
- sb.append("&key=" + API_KEY);
- String url = sb.toString();
-
- openHttpsPostConnection(url, data, sampleRate);
- }
-
- /**
- * Opens a chunked response HTTPS line to the specified URL
- * @param urlStr The URL string to connect for chunking
- * @param data The data you want to send to Google. Speech files under 15 seconds long recommended.
- * @param sampleRate The sample rate for your audio file.
- */
- private void openHttpsPostConnection(final String urlStr, final byte[] data, final int sampleRate) {
- new Thread () {
- public void run() {
- HttpsURLConnection httpConn = null;
- ByteBuffer buff = ByteBuffer.wrap(data);
- byte[] destdata = new byte[2048];
- int resCode = -1;
- OutputStream out = null;
- try {
- URL url = new URL(urlStr);
- URLConnection urlConn = url.openConnection();
- if (!(urlConn instanceof HttpsURLConnection)) {
- throw new IOException ("URL must be HTTPS");
- }
- httpConn = (HttpsURLConnection)urlConn;
- httpConn.setAllowUserInteraction(false);
- httpConn.setInstanceFollowRedirects(true);
- httpConn.setRequestMethod("POST");
- httpConn.setDoOutput(true);
- httpConn.setChunkedStreamingMode(0); //TransferType: chunked
- httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate);
- // this opens a connection, then sends POST & headers.
- out = httpConn.getOutputStream();
- //beyond 15 sec duration just simply writing the file
- // does not seem to work. So buffer it and delay to simulate
- // bufferd microphone delivering stream of speech
- // re: net.http.ChunkedOutputStream.java
- while(buff.remaining() >= destdata.length){
- buff.get(destdata);
- out.write(destdata);
- };
- byte[] lastr = new byte[buff.remaining()];
- buff.get(lastr, 0, lastr.length);
- out.write(lastr);
- out.close();
- resCode = httpConn.getResponseCode();
- if(resCode >= HttpURLConnection.HTTP_UNAUTHORIZED){//Stops here if Google doesn't like us/
- throw new HTTPException(HttpURLConnection.HTTP_UNAUTHORIZED);//Throws
- }
- String line;//Each line that is read back from Google.
- BufferedReader br = new BufferedReader(new InputStreamReader(httpConn.getInputStream()));
- while ((line = br.readLine( )) != null) {
- if(line.length()>19 && resCode > 100 && resCode < HttpURLConnection.HTTP_UNAUTHORIZED){
- GoogleResponse gr = new GoogleResponse();
- parseResponse(line, gr);
- fireResponseEvent(gr);
- }
- }
- } catch (MalformedURLException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- finally {httpConn.disconnect();}
- }
- }.start();
- }
-
- /**
- * Converts the file into a byte[].
- * @param infile The File you want to specify
- * @return a byte array
- * @throws IOException if something goes wrong reading the file.
- */
- private byte[] mapFileIn(File infile) throws IOException{
- FileInputStream fis = new FileInputStream(infile);
- try{
- FileChannel fc = fis.getChannel(); // Get the file's size and then map it into memory
- int sz = (int)fc.size();
- MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
- byte[] data2 = new byte[bb.remaining()];
- bb.get(data2);
- return data2;
- }
- finally{//Ensures resources are closed regardless of whether the action suceeded
- fis.close();
- }
- }
-
- /**
- * Parses the response into a Google Response
- * @param rawResponse The raw String you want to parse
- * @param gr The GoogleResponse you want to parse into ti.
- */
- private void parseResponse(String rawResponse, GoogleResponse gr){
- if(rawResponse == null || !rawResponse.contains("\"result\"")){ return; }
- if(rawResponse.contains("\"confidence\":")){
- String confidence = StringUtil.substringBetween(rawResponse, "\"confidence\":", "}");
- gr.setConfidence(confidence);
- }
- else{
- gr.setConfidence(String.valueOf(1d));
- }
- String array = StringUtil.trimString(rawResponse, "[", "]");
- if(array.contains("[")){
- array = StringUtil.trimString(array, "[", "]");
- }
- String[] parts = array.split(",");
- gr.setResponse(parseTranscript(parts[0]));
- for(int i = 1; i100){
- List fragments = parseString(synthText);//parses String if too long
- String tmp = getLanguage();
- setLanguage(languageCode);//Keeps it from autodetecting each fragment.
- InputStream out = getMP3Data(fragments);
- setLanguage(tmp);//Reverts it to it's previous Language such as auto.
- return out;
- }
-
-
- String encoded = URLEncoder.encode(synthText, "UTF-8"); //Encode
-
- URL url = new URL(GOOGLE_SYNTHESISER_URL + languageCode + "&q=" + encoded); //create url
-
- // Open New URL connection channel.
- URLConnection urlConn = url.openConnection(); //Open connection
-
- urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); //Adding header for user agent is required
-
- return urlConn.getInputStream();
- }
-
- /**
- * Gets an InputStream to MP3Data for the returned information from a request
- * @param synthText List of Strings you want to be synthesized into MP3 data
- * @return Returns an input stream of all the MP3 data that is returned from Google
- * @throws IOException Throws exception if it cannot complete the request
- */
- public InputStream getMP3Data(List synthText) throws IOException{
- //Uses an executor service pool for concurrency. Limit to 1000 threads max.
- ExecutorService pool = Executors.newFixedThreadPool(1000);
- //Stores the Future (Data that will be returned in the future)
- Set> set = new LinkedHashSet>(synthText.size());
- for(String part: synthText){ //Iterates through the list
- Callable callable = new MP3DataFetcher(part);//Creates Callable
- Future future = pool.submit(callable);//Begins to run Callable
- set.add(future);//Adds the response that will be returned to a set.
- }
- List inputStreams = new ArrayList(set.size());
- for(Future future: set){
- try {
- inputStreams.add(future.get());//Gets the returned data from the future.
- } catch (ExecutionException e) {//Thrown if the MP3DataFetcher encountered an error.
- Throwable ex = e.getCause();
- if(ex instanceof IOException){
- throw (IOException)ex;//Downcasts and rethrows it.
- }
- } catch (InterruptedException e){//Will probably never be called, but just in case...
- Thread.currentThread().interrupt();//Interrupts the thread since something went wrong.
- }
- }
- return new SequenceInputStream(Collections.enumeration(inputStreams));//Sequences the stream.
- }
-
- /**
- * Separates a string into smaller parts so that Google will not reject the request.
- * @param input The string you want to separate
- * @return A List of the String fragments from your input..
- */
- private List parseString(String input){
- return parseString (input, new ArrayList());
- }
-
- /**
- * Separates a string into smaller parts so that Google will not reject the request.
- * @param input The string you want to break up into smaller parts
- * @param fragments List that you want to add stuff too.
- * If you don't have a List already constructed "new ArrayList()" works well.
- * @return A list of the fragments of the original String
- */
- private List parseString(String input, List fragments){
- if(input.length()<=100){//Base Case
- fragments.add(input);
- return fragments;
- }
- else{
- int lastWord = findLastWord(input);//Checks if a space exists
- if(lastWord<=0){
- fragments.add(input.substring(0,100));//In case you sent gibberish to Google.
- return parseString(input.substring(100), fragments);
- }else{
- fragments.add(input.substring(0,lastWord));//Otherwise, adds the last word to the list for recursion.
- return parseString(input.substring(lastWord), fragments);
- }
- }
- }
-
- /**
- * Finds the last word in your String (before the index of 99) by searching for spaces and ending punctuation.
- * Will preferably parse on punctuation to alleviate mid-sentence pausing
- * @param input The String you want to search through.
- * @return The index of where the last word of the string ends before the index of 99.
- */
- private int findLastWord(String input){
- if(input.length()<100)
- return input.length();
- int space = -1;
- for(int i = 99; i>0; i--){
- char tmp = input.charAt(i);
- if(isEndingPunctuation(tmp)){
- return i+1;
- }
- if(space==-1 && tmp == ' '){
- space = i;
- }
- }
- if(space>0){
- return space;
- }
- return -1;
- }
-
- /**
- * Checks if char is an ending character
- * Ending punctuation for all languages according to Wikipedia (Except for Sanskrit non-unicode)
- * @param The char you want check
- * @return True if it is, false if not.
- */
- private boolean isEndingPunctuation(char input){
- return input == '.' || input == '!' || input == '?' || input == ';' || input == ':' || input == '|';
- }
-
- /**
- * Automatically determines the language of the original text
- * @param text represents the text you want to check the language of
- * @return the languageCode in ISO-639
- * @throws Exception if it cannot complete the request
- */
- public String detectLanguage(String text) throws IOException{
- return GoogleTranslate.detectLanguage(text);
- }
-
- /**
- * This class is a callable.
- * A callable is like a runnable except that it can return data and throw exceptions.
- * Useful when using futures. Dramatically improves the speed of execution.
- * @author Aaron Gokaslan (Skylion)
- */
- private class MP3DataFetcher implements Callable{
- private String synthText;
-
- public MP3DataFetcher(String synthText){
- this.synthText = synthText;
- }
-
- public InputStream call() throws IOException{
- return getMP3Data(synthText);
- }
- }
-
-}
-
diff --git a/lib/java-speech-api-master/src/com/darkprograms/speech/synthesiser/SynthesiserV2.java b/lib/java-speech-api-master/src/com/darkprograms/speech/synthesiser/SynthesiserV2.java
deleted file mode 100755
index db124123..00000000
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/synthesiser/SynthesiserV2.java
+++ /dev/null
@@ -1,303 +0,0 @@
-package com.darkprograms.speech.synthesiser;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.SequenceInputStream;
-import java.net.URL;
-import java.net.URLConnection;
-import java.net.URLEncoder;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-
-import com.darkprograms.speech.translator.GoogleTranslate;
-
-
-/**
- * This class uses the V2 version of Google's Text to Speech API. While this class requires an API key,
- * the endpoint allows for additional specification of parameters including speed and pitch.
- * See the constructor for instructions regarding the API_Key.
- * @author Skylion (Aaron Gokaslan)
- */
-public class SynthesiserV2 {
-
- private static final String GOOGLE_SYNTHESISER_URL = "https://www.google.com/speech-api/v2/synthesize?enc=mpeg" +
- "&client=chromium";
-
- /**
- * API_KEY used for requests
- */
- private final String API_KEY;
-
- /**
- * language of the Text you want to translate
- */
- private String languageCode;
-
- /**
- * The pitch of the generated audio
- */
- private double pitch = 1.0;
-
- /**
- * The speed of the generated audio
- */
- private double speed = 1.0;
-
- /**
- * Constructor
- * @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting
- * one by following the process shown at this
- * url.
- */
- public SynthesiserV2(String API_KEY){
- this.API_KEY = API_KEY;
- }
-
- /**
- * Returns the current language code for the Synthesiser.
- * Example: English(Generic) = en, English (US) = en-US, English (UK) = en-GB. and Spanish = es;
- * @return the current language code parameter
- */
- public String getLanguage(){
- return languageCode;
- }
-
- /**
- * Note: set language to auto to enable automatic language detection.
- * Setting to null will also implement Google's automatic language detection
- * @param languageCode The language code you would like to modify languageCode to.
- */
- public void setLanguage(String languageCode){
- this.languageCode = languageCode;
- }
-
- /**
- * @return the pitch
- */
- public double getPitch() {
- return pitch;
- }
-
- /**
- * Sets the pitch of the audio.
- * Valid values range from 0 to 2 inclusive.
- * Values above 1 correspond to higher pitch, values below 1 correspond to lower pitch.
- * @param pitch the pitch to set
- */
- public void setPitch(double pitch) {
- this.pitch = pitch;
- }
-
- /**
- * @return the speed
- */
- public double getSpeed() {
- return speed;
- }
-
- /**
- * Sets the speed of audio.
- * Valid values range from 0 to 2 inclusive.
- * Values higher than one correspond to faster and vice versa.
- * @param speed the speed to set
- */
- public void setSpeed(double speed) {
- this.speed = speed;
- }
-
- /**
- * Gets an input stream to MP3 data for the returned information from a request
- *
- * @param synthText Text you want to be synthesized into MP3 data
- * @return Returns an input stream of the MP3 data that is returned from Google
- * @throws IOException Throws exception if it can not complete the request
- */
- public InputStream getMP3Data(String synthText) throws IOException{
-
- String languageCode = this.languageCode;//Ensures retention of language settings if set to auto
-
- if(languageCode == null || languageCode.equals("") || languageCode.equalsIgnoreCase("auto")){
- try{
- languageCode = detectLanguage(synthText);//Detects language
- if(languageCode == null){
- languageCode = "en-us";//Reverts to Default Language if it can't detect it.
- }
- }
- catch(Exception ex){
- ex.printStackTrace();
- languageCode = "en-us";//Reverts to Default Language if it can't detect it.
- }
- }
-
- if(synthText.length()>100){
- List fragments = parseString(synthText);//parses String if too long
- String tmp = getLanguage();
- setLanguage(languageCode);//Keeps it from autodetecting each fragment.
- InputStream out = getMP3Data(fragments);
- setLanguage(tmp);//Reverts it to it's previous Language such as auto.
- return out;
- }
-
-
- String encoded = URLEncoder.encode(synthText, "UTF-8"); //Encode
-
- StringBuilder sb = new StringBuilder(GOOGLE_SYNTHESISER_URL);
- sb.append("&key=" + API_KEY);
- sb.append("&text=" + encoded);
- sb.append("&lang=" + languageCode);
-
- if(speed>=0 && speed<=2.0){
- sb.append("&speed=" + speed/2.0);
- }
-
- if(pitch>=0 && pitch<=2.0){
- sb.append("&pitch=" + pitch/2.0);
- }
-
- URL url = new URL(sb.toString()); //create url
-
- // Open New URL connection channel.
- URLConnection urlConn = url.openConnection(); //Open connection
-
- urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); //Adding header for user agent is required
-
- return urlConn.getInputStream();
- }
-
- /**
- * Gets an InputStream to MP3Data for the returned information from a request
- * @param synthText List of Strings you want to be synthesized into MP3 data
- * @return Returns an input stream of all the MP3 data that is returned from Google
- * @throws IOException Throws exception if it cannot complete the request
- */
- public InputStream getMP3Data(List synthText) throws IOException{
- //Uses an executor service pool for concurrency. Limit to 1000 threads max.
- ExecutorService pool = Executors.newFixedThreadPool(1000);
- //Stores the Future (Data that will be returned in the future)
- Set> set = new LinkedHashSet>(synthText.size());
- for(String part: synthText){ //Iterates through the list
- Callable callable = new MP3DataFetcher(part);//Creates Callable
- Future future = pool.submit(callable);//Begins to run Callable
- set.add(future);//Adds the response that will be returned to a set.
- }
- List inputStreams = new ArrayList(set.size());
- for(Future future: set){
- try {
- inputStreams.add(future.get());//Gets the returned data from the future.
- } catch (ExecutionException e) {//Thrown if the MP3DataFetcher encountered an error.
- Throwable ex = e.getCause();
- if(ex instanceof IOException){
- throw (IOException)ex;//Downcasts and rethrows it.
- }
- } catch (InterruptedException e){//Will probably never be called, but just in case...
- Thread.currentThread().interrupt();//Interrupts the thread since something went wrong.
- }
- }
- return new SequenceInputStream(Collections.enumeration(inputStreams));//Sequences the stream.
- }
-
- /**
- * Separates a string into smaller parts so that Google will not reject the request.
- * @param input The string you want to separate
- * @return A List of the String fragments from your input..
- */
- private List parseString(String input){
- return parseString (input, new ArrayList());
- }
-
- /**
- * Separates a string into smaller parts so that Google will not reject the request.
- * @param input The string you want to break up into smaller parts
- * @param fragments List that you want to add stuff too.
- * If you don't have a List already constructed "new ArrayList()" works well.
- * @return A list of the fragments of the original String
- */
- private List parseString(String input, List fragments){
- if(input.length()<=100){//Base Case
- fragments.add(input);
- return fragments;
- }
- else{
- int lastWord = findLastWord(input);//Checks if a space exists
- if(lastWord<=0){
- fragments.add(input.substring(0,100));//In case you sent gibberish to Google.
- return parseString(input.substring(100), fragments);
- }else{
- fragments.add(input.substring(0,lastWord));//Otherwise, adds the last word to the list for recursion.
- return parseString(input.substring(lastWord), fragments);
- }
- }
- }
-
- /**
- * Finds the last word in your String (before the index of 99) by searching for spaces and ending punctuation.
- * Will preferably parse on punctuation to alleviate mid-sentence pausing
- * @param input The String you want to search through.
- * @return The index of where the last word of the string ends before the index of 99.
- */
- private int findLastWord(String input){
- if(input.length()<100)
- return input.length();
- int space = -1;
- for(int i = 99; i>0; i--){
- char tmp = input.charAt(i);
- if(isEndingPunctuation(tmp)){
- return i+1;
- }
- if(space==-1 && tmp == ' '){
- space = i;
- }
- }
- if(space>0){
- return space;
- }
- return -1;
- }
-
- /**
- * Checks if char is an ending character
- * Ending punctuation for all languages according to Wikipedia (Except for Sanskrit non-unicode)
- * @param The char you want check
- * @return True if it is, false if not.
- */
- private boolean isEndingPunctuation(char input){
- return input == '.' || input == '!' || input == '?' || input == ';' || input == ':' || input == '|';
- }
-
- /**
- * Automatically determines the language of the original text
- * @param text represents the text you want to check the language of
- * @return the languageCode in ISO-639
- * @throws Exception if it cannot complete the request
- */
- public String detectLanguage(String text) throws IOException{
- return GoogleTranslate.detectLanguage(text);
- }
-
- /**
- * This class is a callable.
- * A callable is like a runnable except that it can return data and throw exceptions.
- * Useful when using futures. Dramatically improves the speed of execution.
- * @author Aaron Gokaslan (Skylion)
- */
- private class MP3DataFetcher implements Callable{
- private String synthText;
-
- public MP3DataFetcher(String synthText){
- this.synthText = synthText;
- }
-
- public InputStream call() throws IOException{
- return getMP3Data(synthText);
- }
- }
-
-}
\ No newline at end of file
diff --git a/lib/java-speech-api-master/src/com/darkprograms/speech/translator/GoogleTranslate.java b/lib/java-speech-api-master/src/com/darkprograms/speech/translator/GoogleTranslate.java
deleted file mode 100755
index 4572641d..00000000
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/translator/GoogleTranslate.java
+++ /dev/null
@@ -1,168 +0,0 @@
-package com.darkprograms.speech.translator;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.net.URL;
-import java.net.URLConnection;
-import java.net.URLEncoder;
-import java.nio.charset.Charset;
-import java.util.Locale;
-
-/***************************************************************************************************************
- * An API for a Google Translation service in Java.
- * Please Note: This API is unofficial and is not supported by Google. Subject to breakage at any time.
- * The translator allows for language detection and translation.
- * Recommended for translation of user interfaces or speech commands.
- * All translation services provided via Google Translate
- * @author Aaron Gokaslan (Skylion)
- ***************************************************************************************************************/
-public final class GoogleTranslate { //Class marked as final since all methods are static
-
- /**
- * URL to query for Translation
- */
- private final static String GOOGLE_TRANSLATE_URL = "http://translate.google.com/translate_a/t?client=t";
-
- /**
- * Private to prevent instantiation
- */
- private GoogleTranslate(){};
-
- /**
- * Converts the ISO-639 code into a friendly language code in the user's default language
- * For example, if the language is English and the default locale is French, it will return "anglais"
- * Useful for UI Strings
- * @param languageCode The ISO639-1
- * @return The language in the user's default language
- * @see {@link #detectLanguage}
- */
- public static String getDisplayLanguage(String languageCode){
- return (new Locale(languageCode)).getDisplayLanguage();
- }
-
- /**
- * Automatically determines the language of the original text
- * @param text represents the text you want to check the language of
- * @return The ISO-639 code for the language
- * @throws IOException if it cannot complete the request
- */
- public static String detectLanguage(String text) throws IOException{
- String encoded = URLEncoder.encode(text, "UTF-8"); //Encodes the string
- URL url = new URL(GOOGLE_TRANSLATE_URL + "&text=" + encoded); //Generates URL
- String rawData = urlToText(url);//Gets text from Google
- return findLanguage(rawData);
- }
-
-
- /**
- * Automatically translates text to a system's default language according to its locale
- * Useful for creating international applications as you can translate UI strings
- * @param text The text you want to translate
- * @return The translated text
- * @throws IOException if cannot complete request
- */
- public static String translate(String text) throws IOException{
- return translate(Locale.getDefault().getLanguage(), text);
- }
-
- /**
- * Automatically detects language and translate to the targetLanguage
- * @param targetLanguage The language you want to translate into in ISO-639 format
- * @param text The text you actually want to translate
- * @return The translated text.
- * @throws IOException if it cannot complete the request
- */
- public static String translate(String targetLanguage, String text) throws IOException{
- return translate("auto",targetLanguage, text);
- }
-
- /**
- * Translate text from sourceLanguage to targetLanguage
- * Specifying the sourceLanguage greatly improves accuracy over short Strings
- * @param sourceLanguage The language you want to translate from in ISO-639 format
- * @param targetLanguage The language you want to translate into in ISO-639 format
- * @param text The text you actually want to translate
- * @return the translated text.
- * @throws IOException if it cannot complete the request
- */
- public static String translate(String sourceLanguage, String targetLanguage, String text) throws IOException{
- String encoded = URLEncoder.encode(text, "UTF-8"); //Encode
- //Generates URL
- URL url = new URL(GOOGLE_TRANSLATE_URL + "&sl=" + sourceLanguage + "&tl=" + targetLanguage + "&text=" + encoded);
- String rawData = urlToText(url);//Gets text from Google
- if(rawData==null){
- return null;
- }
- String[] raw = rawData.split("\"");//Parses the JSON
- if(raw.length<2){
- return null;
- }
- return raw[1];//Returns the translation
- }
-
- /**
- * Converts a URL to Text
- * @param url that you want to generate a String from
- * @return The generated String
- * @throws IOException if it cannot complete the request
- */
- private static String urlToText(URL url) throws IOException{
- URLConnection urlConn = url.openConnection(); //Open connection
- //Adding header for user agent is required. Otherwise, Google rejects the request
- urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0");
- Reader r = new java.io.InputStreamReader(urlConn.getInputStream(), Charset.forName("UTF-8"));//Gets Data Converts to string
- StringBuilder buf = new StringBuilder();
- while (true) {//Reads String from buffer
- int ch = r.read();
- if (ch < 0)
- break;
- buf.append((char) ch);
- }
- String str = buf.toString();
- return str;
- }
-
-
- /**
- * Searches RAWData for Language
- * @param RAWData the raw String directly from Google you want to search through
- * @return The language parsed from the rawData or en-US (English-United States) if Google cannot determine it.
- */
- private static String findLanguage(String rawData){
- for(int i = 0; i+50)
- return rawData.substring(i+2,i+2+lastQuote);
- }
- else{
- String possible = rawData.substring(i+2,i+4);
- if(containsLettersOnly(possible)){//Required due to Google's inconsistent formatting.
- return possible;
- }
- }
- }
- }
- return null;
- }
-
- /**
- * Checks if all characters in text are letters.
- * @param text The text you want to determine the validity of.
- * @return True if all characters are letter, otherwise false.
- */
- private static boolean containsLettersOnly(String text){
- for(int i = 0; i. All rights reserved.
-//
-//Redistribution and use in source and binary forms, with or without
-//modification, are permitted provided that the following conditions
-//are met:
-//1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-//THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-//ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-//IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-//ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-//FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-//DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-//OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-//HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-//OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-//SUCH DAMAGE.
-//
-//Visit the ACME Labs Java page for up-to-date versions of this and other
-//fine Java utilities: http://www.acme.com/java/
-
-
-import java.io.*;
-import java.util.*;
-
-/// An OutputStream that implements HTTP/1.1 chunking.
-//
-//This class lets a Servlet send its response data as an HTTP/1.1 chunked
-//stream. Chunked streams are a way to send arbitrary-length data without
-//having to know beforehand how much you're going to send. They are
-//introduced by a "Transfer-Encoding: chunked" header, so you have to
-//set that header when you make one of these streams.
-//
-//Sample usage:
-//
-//res.setHeader( "Transfer-Encoding", "chunked" );
-//OutputStream out = res.getOutputStream();
-//ChunkedOutputStream chunkOut = new ChunkedOutputStream( out );
-//(write data to chunkOut instead of out)
-//(optionally set footers)
-//chunkOut.done();
-//
-//
-//Every time the stream gets flushed, a chunk is sent. When done()
-//is called, an empty chunk is sent, marking the end of the chunked
-//stream as per the chunking spec.
-//
-//Fetch the software.
-//Fetch the entire Acme package.
-
-public class ChunkedOutputStream extends BufferedOutputStream
-{
-
- /// Make a ChunkedOutputStream with a default buffer size.
- // @param out the underlying output stream
- public ChunkedOutputStream( OutputStream out )
- {
- super( out );
- }
-
- /// Make a ChunkedOutputStream with a specified buffer size.
- // @param out the underlying output stream
- // @param size the buffer size
- public ChunkedOutputStream( OutputStream out, int size )
- {
- super( out, size );
- }
-
-
- /// Flush the stream. This will write any buffered output
- // bytes as a chunk.
- // @exception IOException if an I/O error occurred
- public synchronized void flush() throws IOException
- {
- if ( count != 0 )
- {
- writeBuf( buf, 0, count );
- count = 0;
- }
- }
-
-
- private Vector footerNames = new Vector();
- private Vector footerValues = new Vector();
-
- /// Set a footer. Footers are much like HTTP headers, except that
- // they come at the end of the data instead of at the beginning.
- public void setFooter( String name, String value )
- {
- footerNames.addElement( name );
- footerValues.addElement( value );
- }
-
-
- /// Indicate the end of the chunked data by sending a zero-length chunk,
- // possible including footers.
- // @exception IOException if an I/O error occurred
- public void done() throws IOException
- {
- flush();
- PrintStream pout = new PrintStream( out );
- pout.println( "0" );
- if ( footerNames.size() > 0 )
- {
- // Send footers.
- for ( int i = 0; i < footerNames.size(); ++i )
- {
- String name = (String) footerNames.elementAt( i );
- String value = (String) footerValues.elementAt( i );
- pout.println( name + ": " + value );
- }
- }
- footerNames = null;
- footerValues = null;
- pout.println( "" );
- pout.flush();
- }
-
-
- /// Make sure that calling close() terminates the chunked stream.
- public void close() throws IOException
- {
- if ( footerNames != null )
- done();
- super.close();
- }
-
-
- /// Write a sub-array of bytes.
- //
- // The only reason we have to override the BufferedOutputStream version
- // of this is that it writes the array directly to the output stream
- // if doesn't fit in the buffer. So we make it use our own chunk-write
- // routine instead. Otherwise this is identical to the parent-class
- // version.
- // @param b the data to be written
- // @param off the start offset in the data
- // @param len the number of bytes that are written
- // @exception IOException if an I/O error occurred
- public synchronized void write( byte b[], int off, int len ) throws IOException
- {
- int avail = buf.length - count;
-
- if ( len <= avail )
- {
- System.arraycopy( b, off, buf, count, len );
- count += len;
- return;
- }
- flush();
- writeBuf( b, off, len );
- }
-
-
- private static final byte[] crlf = { 13, 10 };
- private byte[] lenBytes = new byte[20]; // big enough for any number in hex
-
- /// The only routine that actually writes to the output stream.
- // This is where chunking semantics are implemented.
- // @exception IOException if an I/O error occurred
- private void writeBuf( byte b[], int off, int len ) throws IOException
- {
- // Write the chunk length as a hex number.
- String lenStr = Integer.toString( len, 16 );
- lenStr.getBytes( 0, lenStr.length(), lenBytes, 0 );
- out.write( lenBytes );
- // Write a CRLF.
- out.write( crlf );
- // Write the data.
- if ( len != 0 )
- out.write( b, off, len );
- // Write a CRLF.
- out.write( crlf );
- // And flush the real stream.
- out.flush();
- }
-
-}
-
diff --git a/lib/java-speech-api-master/src/com/darkprograms/speech/util/Complex.java b/lib/java-speech-api-master/src/com/darkprograms/speech/util/Complex.java
deleted file mode 100755
index 5177eafe..00000000
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/util/Complex.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package com.darkprograms.speech.util;
-
-
-/*************************************************************************
- * Compilation: javac Complex.java
- * Execution: java Complex
- *
- * Data type for complex numbers.
- *
- * The data type is "immutable" so once you create and initialize
- * a Complex object, you cannot change it. The "final" keyword
- * when declaring re and im enforces this rule, making it a
- * compile-time error to change the .re or .im fields after
- * they've been initialized.
- *
- * Class based off of Princeton University's Complex.java class
- * @author Aaron Gokaslan, Princeton University
- *************************************************************************/
-
-public class Complex {
- private final double re; // the real part
- private final double im; // the imaginary part
-
- // create a new object with the given real and imaginary parts
- public Complex(double real, double imag) {
- re = real;
- im = imag;
- }
-
- // return a string representation of the invoking Complex object
- public String toString() {
- if (im == 0) return re + "";
- if (re == 0) return im + "i";
- if (im < 0) return re + " - " + (-im) + "i";
- return re + " + " + im + "i";
- }
-
- // return abs/modulus/magnitude and angle/phase/argument
- public double abs() { return Math.hypot(re, im); } // Math.sqrt(re*re + im*im)
- public double phase() { return Math.atan2(im, re); } // between -pi and pi
-
- // return a new Complex object whose value is (this + b)
- public Complex plus(Complex b) {
- Complex a = this; // invoking object
- double real = a.re + b.re;
- double imag = a.im + b.im;
- return new Complex(real, imag);
- }
-
- // return a new Complex object whose value is (this - b)
- public Complex minus(Complex b) {
- Complex a = this;
- double real = a.re - b.re;
- double imag = a.im - b.im;
- return new Complex(real, imag);
- }
-
- // return a new Complex object whose value is (this * b)
- public Complex times(Complex b) {
- Complex a = this;
- double real = a.re * b.re - a.im * b.im;
- double imag = a.re * b.im + a.im * b.re;
- return new Complex(real, imag);
- }
-
- // scalar multiplication
- // return a new object whose value is (this * alpha)
- public Complex times(double alpha) {
- return new Complex(alpha * re, alpha * im);
- }
-
- // return a new Complex object whose value is the conjugate of this
- public Complex conjugate() { return new Complex(re, -im); }
-
- // return a new Complex object whose value is the reciprocal of this
- public Complex reciprocal() {
- double scale = re*re + im*im;
- return new Complex(re / scale, -im / scale);
- }
-
- // return the real or imaginary part
- public double re() { return re; }
- public double im() { return im; }
-
- // return a / b
- public Complex divides(Complex b) {
- Complex a = this;
- return a.times(b.reciprocal());
- }
-
- // return a new Complex object whose value is the complex exponential of this
- public Complex exp() {
- return new Complex(Math.exp(re) * Math.cos(im), Math.exp(re) * Math.sin(im));
- }
-
- // return a new Complex object whose value is the complex sine of this
- public Complex sin() {
- return new Complex(Math.sin(re) * Math.cosh(im), Math.cos(re) * Math.sinh(im));
- }
-
- // return a new Complex object whose value is the complex cosine of this
- public Complex cos() {
- return new Complex(Math.cos(re) * Math.cosh(im), -Math.sin(re) * Math.sinh(im));
- }
-
- // return a new Complex object whose value is the complex tangent of this
- public Complex tan() {
- return sin().divides(cos());
- }
-
- // returns the magnitude of the imaginary number.
- public double getMagnitude(){
- return Math.sqrt(re*re+im*im);
- }
-
- public boolean equals(Complex other){
- return (re==other.re) && (im==other.im);
- }
-
-}
diff --git a/lib/java-speech-api-master/src/com/darkprograms/speech/util/FFT.java b/lib/java-speech-api-master/src/com/darkprograms/speech/util/FFT.java
deleted file mode 100755
index 5ceb4797..00000000
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/util/FFT.java
+++ /dev/null
@@ -1,133 +0,0 @@
-package com.darkprograms.speech.util;
-
-
-/*************************************************************************
- * Compilation: javac FFT.java
- * Execution: java FFT N
- * Dependencies: Complex.java
- *
- * Compute the FFT and inverse FFT of a length N complex sequence.
- * Bare bones implementation that runs in O(N log N) time. Our goal
- * is to optimize the clarity of the code, rather than performance.
- *
- * Limitations
- * -----------
- * - assumes N is a power of 2
- *
- * - not the most memory efficient algorithm (because it uses
- * an object type for representing complex numbers and because
- * it re-allocates memory for the subarray, instead of doing
- * in-place or reusing a single temporary array)
- *
- *************************************************************************/
-
-/*************************************************************************
- * @author Skylion implementation
- * @author Princeton University for the actual algorithm.
- ************************************************************************/
-
-public class FFT {
-
- // compute the FFT of x[], assuming its length is a power of 2
- public static Complex[] fft(Complex[] x) {
- int N = x.length;
-
- // base case
- if (N == 1) return new Complex[] { x[0] };
-
- // radix 2 Cooley-Tukey FFT
- if (N % 2 != 0) { throw new RuntimeException("N is not a power of 2"); }
-
- // fft of even terms
- Complex[] even = new Complex[N/2];
- for (int k = 0; k < N/2; k++) {
- even[k] = x[2*k];
- }
- Complex[] q = fft(even);
-
- // fft of odd terms
- Complex[] odd = even; // reuse the array
- for (int k = 0; k < N/2; k++) {
- odd[k] = x[2*k + 1];
- }
- Complex[] r = fft(odd);
-
- // combine
- Complex[] y = new Complex[N];
- for (int k = 0; k < N/2; k++) {
- double kth = -2 * k * Math.PI / N;
- Complex wk = new Complex(Math.cos(kth), Math.sin(kth));
- y[k] = q[k].plus(wk.times(r[k]));
- y[k + N/2] = q[k].minus(wk.times(r[k]));
- }
- return y;
- }
-
-
- // compute the inverse FFT of x[], assuming its length is a power of 2
- public static Complex[] ifft(Complex[] x) {
- int N = x.length;
- Complex[] y = new Complex[N];
-
- // take conjugate
- for (int i = 0; i < N; i++) {
- y[i] = x[i].conjugate();
- }
-
- // compute forward FFT
- y = fft(y);
-
- // take conjugate again
- for (int i = 0; i < N; i++) {
- y[i] = y[i].conjugate();
- }
-
- // divide by N
- for (int i = 0; i < N; i++) {
- y[i] = y[i].times(1.0 / N);
- }
-
- return y;
-
- }
-
- // compute the circular convolution of x and y
- public static Complex[] cconvolve(Complex[] x, Complex[] y) {
-
- // should probably pad x and y with 0s so that they have same length
- // and are powers of 2
- if (x.length != y.length) { throw new RuntimeException("Dimensions don't agree"); }
-
- int N = x.length;
-
- // compute FFT of each sequence
- Complex[] a = fft(x);
- Complex[] b = fft(y);
-
- // point-wise multiply
- Complex[] c = new Complex[N];
- for (int i = 0; i < N; i++) {
- c[i] = a[i].times(b[i]);
- }
-
- // compute inverse FFT
- return ifft(c);
- }
-
-
- // compute the linear convolution of x and y
- public static Complex[] convolve(Complex[] x, Complex[] y) {
- Complex ZERO = new Complex(0, 0);
-
- Complex[] a = new Complex[2*x.length];
- for (int i = 0; i < x.length; i++) a[i] = x[i];
- for (int i = x.length; i < 2*x.length; i++) a[i] = ZERO;
-
- Complex[] b = new Complex[2*y.length];
- for (int i = 0; i < y.length; i++) b[i] = y[i];
- for (int i = y.length; i < 2*y.length; i++) b[i] = ZERO;
-
- return cconvolve(a, b);
- }
-
-}
diff --git a/lib/java-speech-api-master/src/com/darkprograms/speech/util/StringUtil.java b/lib/java-speech-api-master/src/com/darkprograms/speech/util/StringUtil.java
deleted file mode 100755
index 2a062134..00000000
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/util/StringUtil.java
+++ /dev/null
@@ -1,69 +0,0 @@
-package com.darkprograms.speech.util;
-
-/**
- * A string utility class for commonly used methods.
- * These methods are particularly useful for parsing.
- * @author Skylion
- */
-public class StringUtil {
-
- private StringUtil() {};//Prevents instantiation
-
- /**
- * Removes quotation marks from beginning and end of string.
- * @param s The string you want to remove the quotation marks from.
- * @return The modified String.
- */
- public static String stripQuotes(String s) {
- int start = 0;
- if( s.startsWith("\"") ) {
- start = 1;
- }
- int end = s.length();
- if( s.endsWith("\"") ) {
- end = s.length() - 1;
- }
- return s.substring(start, end);
- }
-
- /**
- * Returns the first instance of String found exclusively between part1 and part2.
- * @param s The String you want to substring.
- * @param part1 The beginning of the String you want to search for.
- * @param part2 The end of the String you want to search for.
- * @return The String between part1 and part2.
- * If the s does not contain part1 or part2, the method returns null.
- */
- public static String substringBetween(String s, String part1, String part2) {
- String sub = null;
-
- int i = s.indexOf(part1);
- int j = s.indexOf(part2, i + part1.length());
-
- if (i != -1 && j != -1) {
- int nStart = i + part1.length();
- sub = s.substring(nStart, j);
- }
-
- return sub;
- }
-
- /**
- * Gets the string exclusively between the first instance of part1 and the last instance of part2.
- * @param s The string you want to trim.
- * @param part1 The term to trim after first instance.
- * @param part2 The term to before last instance of.
- * @return The trimmed String
- */
- public static String trimString(String s, String part1, String part2){
- if(!s.contains(part1) || !s.contains(part2)){
- return null;
- }
- int first = s.indexOf(part1) + part1.length() + 1;
- String tmp = s.substring(first);
- int last = tmp.lastIndexOf(part2);
- tmp = tmp.substring(0, last);
- return tmp;
- }
-
-}
diff --git a/lib/javaFlacEncoder-0.3.1.jar b/lib/javaFlacEncoder-0.3.1.jar
deleted file mode 100755
index 636191c5..00000000
Binary files a/lib/javaFlacEncoder-0.3.1.jar and /dev/null differ
diff --git a/lib/sphinx4-5prealpha-src/README b/lib/sphinx4-5prealpha-src/README
deleted file mode 100755
index 62cdac8e..00000000
--- a/lib/sphinx4-5prealpha-src/README
+++ /dev/null
@@ -1,55 +0,0 @@
-Sphinx-4 Speech Recognition System
--------------------------------------------------------------------
-
-Sphinx-4 is a state-of-the-art, speaker-independent, continuous speech
-recognition system written entirely in the Java programming language. It
-was created via a joint collaboration between the Sphinx group at
-Carnegie Mellon University, Sun Microsystems Laboratories, Mitsubishi
-Electric Research Labs (MERL), and Hewlett Packard (HP), with
-contributions from the University of California at Santa Cruz (UCSC) and
-the Massachusetts Institute of Technology (MIT).
-
-The design of Sphinx-4 is based on patterns that have emerged from the
-design of past systems as well as new requirements based on areas that
-researchers currently want to explore. To exercise this framework, and
-to provide researchers with a "research-ready" system, Sphinx-4 also
-includes several implementations of both simple and state-of-the-art
-techniques. The framework and the implementations are all freely
-available via open source under a very generous BSD-style license.
-
-Because it is written entirely in the Java programming language, Sphinx-4
-can run on a variety of platforms without requiring any special
-compilation or changes. We've tested Sphinx-4 on the following platforms
-with success.
-
-To get started with sphinx4 visit our wiki
-
- http://cmusphinx.sourceforge.net/wiki
-
-Please give Sphinx-4 a try and post your questions, comments, and
-feedback to one of the CMU Sphinx Forums:
-
- http://sourceforge.net/p/cmusphinx/discussion/sphinx4
-
-We can also be reached at cmusphinx-devel@lists.sourceforge.net.
-
-Sincerely,
-
-The Sphinx-4 Team:
-(in alph. order)
-
-Evandro Gouvea, CMU (developer and speech advisor)
-Peter Gorniak, MIT (developer)
-Philip Kwok, Sun Labs (developer)
-Paul Lamere, Sun Labs (design/technical lead)
-Beth Logan, HP (speech advisor)
-Pedro Moreno, Google (speech advisor)
-Bhiksha Raj, MERL (design lead)
-Mosur Ravishankar, CMU (speech advisor)
-Bent Schmidt-Nielsen, MERL (speech advisor)
-Rita Singh, CMU/MIT (design/speech advisor)
-JM Van Thong, HP (speech advisor)
-Willie Walker, Sun Labs (overall lead)
-Manfred Warmuth, USCS (speech advisor)
-Joe Woelfel, MERL (developer and speech advisor)
-Peter Wolf, MERL (developer and speech advisor)
diff --git a/lib/sphinx4-5prealpha-src/RELEASE_NOTES b/lib/sphinx4-5prealpha-src/RELEASE_NOTES
deleted file mode 100755
index bef52998..00000000
--- a/lib/sphinx4-5prealpha-src/RELEASE_NOTES
+++ /dev/null
@@ -1,193 +0,0 @@
-Sphinx-4 Speech Recognition System
-
--------------------------------------------------------------------
-
-Version: 1.0Beta6
-Release Date: March 2011
-
--------------------------------------------------------------------
-
-New Features and Improvements:
-
- * SRGS/GrXML support, more to come soon with support for JSAPI2
- * Model layout is unified with Pocketsphinx/Sphinxtrain
- * Netbeans project files are included
- * Language models can be loaded from URI
- * Batch testing application allows testing inside Sphinxtrain
-
-Bug Fixes:
-
- * Flat linguist accuracy issue fixed
- * Intelligent sorting in paritioner fixes stack overflow when tokens
- have identical scores
- * Various bug fixes
-
-Thanks:
-
- Timo Bauman, Nasir Hussain, Michele Alessandrini, Evandro Goueva,
- Stephen Marquard, Larry A. Taylor, Yuri Orlov, Dirk Schnelle-Walka,
- James Chivers, Firas Al Khalil
-
--------------------------------------------------------------------
-
-Version: 1.0Beta5
-Release Date: August 2010
-
--------------------------------------------------------------------
-
-New Features and Improvements:
-
- * Alignment demo and grammar to align long speech recordings to
- transcription and get word times
- * Lattice grammar for multipass decoding
- * Explicit-backoff in LexTree linguist
- * Significant LVCSR speedup with proper LexTree compression
- * Simple filter to drop zero energy frames
- * Graphviz for grammar dump vizualization instead of AISee
- * Voxforge decoding accuracy test
- * Lattice scoring speedup
- * JSAPI-free JSGF parser
-
-Bug Fixes:
-
- * Insertion probabilities are counted in lattice scores
- * Don't waste resources and memory on dummy acoustic model
- transformations
- * Small DMP files are loaded properly
- * JSGF parser fixes
- * Documentation improvements
- * Debian package stuff
-
-Thanks:
-
- Antoine Raux, Marek Lesiak, Yaniv Kunda, Brian Romanowski, Tony
- Robinson, Bhiksha Raj, Timo Baumann, Michele Alessandrini, Francisco
- Aguilera, Peter Wolf, David Huggins-Daines, Dirk Schnelle-Walka.
-
--------------------------------------------------------------------
-
-Version: 1.0Beta4
-Release Date: February 2010
-
--------------------------------------------------------------------
-
-New Features and Improvements:
-
- * Large arbitrary-order language models
- * Simplified and reworked model loading code
- * Raw configuration and and demos
- * HTK model loader
- * A lot of code optimizations
- * JSAPI-independent JSGF parser
- * Noise filtering components
- * Lattice rescoring
- * Server-based language model
-
-Bug fixes:
-
- * Lots of bug fixes: PLP extraction, race-conditions
- in scoring, etc.
-
-Thanks:
-
- Peter Wolf, Yaniv Kunda, Antoine Raux, Dirk Schnelle-Walka,
- Yannick Estève, Anthony Rousseau and LIUM team, Christophe Cerisara.
-
--------------------------------------------------------------------
-
-Version: 1.0Beta3
-Release Date: August 2009
-
--------------------------------------------------------------------
-
-New Features and Improvements:
-
- * BatchAGC frontend component
- * Completed transition to defaults in annotations
- * ConcatFeatureExtrator to cooperate with cepwin models
- * End of stream signals are passed to the decoder to fix cancellation
- * Timer API improvement
- * Threading policy is changed to TAS
-
-Bug fixes:
-
- * Fixes reading UTF-8 from language model dump.
- * Huge memory optimization of the lattice compression
- * More stable fronend work with DataStart and DataEnd and optional
- SpeechStart/SpeechEnd
-
-Thanks:
-
- Yaniv Kunda, Michele Alessandrini, Holger Brandl, Timo Baumann,
- Evandro Gouvea
-
--------------------------------------------------------------------
-
-Version: 1.0Beta2
-Release Date: February 2009
-
--------------------------------------------------------------------
-
-New Features and Improvments:
-
- * new much cleaner and more robust configuration system
- * migrated to java5
- * xml-free instantiation of new systems
- * improved feature extraction (better voice activity detection, many bugfixes)
- * Cleaned up some of the core APIs
- * include-tag for configuration files
- * better JavaSound support
- * fully qualified grammar names in JSGF (Roger Toenz)
- * support for dictionary addenda in the FastDictionary (Gregg Liming)
- * added batch tools for measuring performance on NIST corpus with CTL files
- * many perforamnce and stability improvments
-
-
--------------------------------------------------------------------
-
-Version: 1.0Beta
-Release Date: September 2004
-
--------------------------------------------------------------------
-
-New Features:
-
- * Confidence scoring
- * Posterior probability computation
- * Sausage creation from a lattice
- * Dynamic grammars
- * Narrow bandwidth acoustic model
- * Out-of-grammar utterance rejection
- * More demonstration programs
- * WSJ5K Language model
-
-Improvements:
-
- * Better control over microphone selection
- * JSGF limitations removed
- * Improved performance for large, perplex JSGF grammars
- * Added Filler support for JSGF Grammars
- * Ability to configure microphone input
- * Added ECMAScript Action Tags support and demos.
-
-Bug fixes:
-
- * Lots of bug fixes
-
-Documentation:
-
- * Added the Sphinx-4 FAQ
- * Added scripts and instructions for building a WSJ5k language model
- from LDC data.
-
-Thanks:
-
- * Peter Gorniak, Willie Walker, Philip Kwok, Paul Lamere
-
--------------------------------------------------------------------
-Version: 0.1alpha
-Release Date: June 2004
-
--------------------------------------------------------------------
-
-Initial release
diff --git a/lib/sphinx4-5prealpha-src/doc/Sphinx4_Whitepaper_2003.pdf b/lib/sphinx4-5prealpha-src/doc/Sphinx4_Whitepaper_2003.pdf
deleted file mode 100755
index 8f686666..00000000
Binary files a/lib/sphinx4-5prealpha-src/doc/Sphinx4_Whitepaper_2003.pdf and /dev/null differ
diff --git a/lib/sphinx4-5prealpha-src/doc/speaker_adaptation.txt b/lib/sphinx4-5prealpha-src/doc/speaker_adaptation.txt
deleted file mode 100755
index 89fdc0f1..00000000
--- a/lib/sphinx4-5prealpha-src/doc/speaker_adaptation.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-Speaker Adaptation with MLLR Transformation
-
-Unsupervised speaker adaptation for Sphinx4
-
-For building an improved acoustic model there are two methods. One of them
-needs to collect data from a speaker and train the acoustic model set. Thus
-using the speaker's characteristics the recognition will be more accurately.
-The disadvantage of this method is that it needs a large amount of data to be
-collected to have a sufficient model accuracy.
-
-The other method, when the amount of data available is small from a new
-speaker, is to collect them and by using an adaptation technique to adapt the
-model set to better fit the speaker's characteristics.
-
-The adaptation technique used is MLLR (maximum likelihood linear regression)
-transform that is applied depending on the available data by generating one or
-more transformations that reduce the mismatch between
-an initial model set and the adaptation data. There is only one transformation
-when the amount of available data is too small and is called global adaptation
-transform. The global transform is applied to every Gaussian component in the
-model set. Otherwise, when the amount of adaptation data is large, the number
-of transformations is increasing and each transformation is applied to a
-certain cluster of Gaussian components.
-
-To be able to decode with an adapted model there are two important classes that
-should be imported:
-
-import edu.cmu.sphinx.decoder.adaptation.Stats;
-import edu.cmu.sphinx.decoder.adaptation.Transform;
-
-Stats Class estimates a MLLR transform for each cluster of data and the
-transform will be applied to the corresponding cluster. You can choose the
-number of clusters by giving the number as argument to
-createStats(nrOfClusters) in Stats method. The method will return an object
-that contains the loaded acoustic model and the number of clusters. This
-important to collect counts from each Result object because based on them we
-will perform the estimation of the MLLR transformation.
-
-Before starting collect counts it is important to have all Gaussians clustered.
-So, createStats(nrOfClusters) will generate an ClusteredDensityFileData object
-to prepare the Gaussians. ClusteredDensityFileData class performs the clustering
-using the "k-means" clustering algorithm. The k-means clustering algorithm aims
-to partition the Gaussians into k clusters in which each Gaussian belongs
-to the cluster with the nearest mean. It is interesting to know that the problem
-of clustering is computationally difficult, so the heuristic used is the
-Euclidean criterion.
-
-The next step is to collect counts from each Result object and store them
-separately for each cluster. Here, the matrices regLs and regRs used in
-computing the transformation are filled. Transform class performs the actual
-transformation for each cluster. Given the counts previously gathered and the
-number of clusters, the class will compute the two matrices A (the
-transformation matrix) B (the bias vector) that are tied across the Gaussians
-from the corresponding cluster. A Transform object will contain all the
-transformations computed for an utterance. To use the adapted acoustic model it
-is necessary to update the Sphinx3Loader which is responsible for
-loading the files from the model. When updating occurs, the acoustic model is
-already loaded, so setTransform(transform) method will replace the old means
-with the new ones.
-
-Now, that we have the theoretical part, let’s see the practical part. Here is
-how you create and use a MLLR transformation:
-
-Stats stats = recognizer.createStats(1);
-recognizer.startRecognition(stream);
-while ((result = recognizer.getResult()) != null) {
- stats.collect(result);
-}
-recognizer.stopRecognition();
-
-// Transform represents the speech profile
-Transform transform = stats.createTransform();
-recognizer.setTransform(transform);
-
-After setting the transformation to the StreamSpeechRecognizer object,
-the recognizer is ready to decode using the new means. The process
-of recognition is the same as you decode with the general acoustic model.
-When you create and set a transformation is like you create a
-new acoustic model with speaker's characteristics, thus the accuracy
-will be better.
-
-For further decodings you can store the transformation of a speaker in a file
-by performing store(“FilePath”, 0) in Transform object.
-
-If you have your own transformation known as mllr_matrix previously generated
-with Sphinx4 or with another program, you can load the file by performing
-load(“FilePath”) in Transform object and then to set it to an Recognizer object.
-
diff --git a/lib/sphinx4-5prealpha-src/license.terms b/lib/sphinx4-5prealpha-src/license.terms
deleted file mode 100755
index b0affbf3..00000000
--- a/lib/sphinx4-5prealpha-src/license.terms
+++ /dev/null
@@ -1,40 +0,0 @@
-Copyright 1999-2015 Carnegie Mellon University.
-Portions Copyright 2002-2008 Sun Microsystems, Inc.
-Portions Copyright 2002-2008 Mitsubishi Electric Research Laboratories.
-Portions Copyright 2013-2015 Alpha Cephei, Inc.
-
-All Rights Reserved. Use is subject to license terms.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
-3. Original authors' names are not deleted.
-
-4. The authors' names are not used to endorse or promote products
- derived from this software without specific prior written
- permission.
-
-This work was supported in part by funding from the Defense Advanced
-Research Projects Agency and the National Science Foundation of the
-United States of America, the CMU Sphinx Speech Consortium, and
-Sun Microsystems, Inc.
-
-CARNEGIE MELLON UNIVERSITY, SUN MICROSYSTEMS, INC., MITSUBISHI
-ELECTRONIC RESEARCH LABORATORIES AND THE CONTRIBUTORS TO THIS WORK
-DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
-CARNEGIE MELLON UNIVERSITY, SUN MICROSYSTEMS, INC., MITSUBISHI
-ELECTRONIC RESEARCH LABORATORIES NOR THE CONTRIBUTORS BE LIABLE FOR
-ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
-OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/lib/sphinx4-5prealpha-src/pom.xml b/lib/sphinx4-5prealpha-src/pom.xml
deleted file mode 100755
index 117116ad..00000000
--- a/lib/sphinx4-5prealpha-src/pom.xml
+++ /dev/null
@@ -1,88 +0,0 @@
-
- 4.0.0
-
-
- org.sonatype.oss
- oss-parent
- 7
-
-
- edu.cmu.sphinx
- sphinx4-parent
- 1.0-SNAPSHOT
- pom
-
- Sphinx4
- http://cmusphinx.sourceforge.net
-
-
- sphinx4-core
- sphinx4-data
- sphinx4-samples
-
-
-
-
- org.testng
- testng
- 6.8.8
- test
-
-
- org.hamcrest
- hamcrest-library
- 1.3
- test
-
-
-
-
- UTF-8
- svn.code.sf.net/p/cmusphinx/code/trunk/sphinx4
-
-
-
- scm:svn:http://${project.scm.root}
- scm:svn:svn+ssh://${project.scm.root}
- http://${project.scm.root}
-
-
-
-
-
- org.apache.maven.plugins
- maven-source-plugin
- 2.2.1
-
-
- attach-sources
- package
-
- jar
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-javadoc-plugin
- 2.9.1
-
-
- attach-javadocs
- package
-
- jar
-
-
-
-
-
-
-
-
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/pom.xml b/lib/sphinx4-5prealpha-src/sphinx4-core/pom.xml
deleted file mode 100755
index 57764718..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/pom.xml
+++ /dev/null
@@ -1,34 +0,0 @@
-
- 4.0.0
-
-
- edu.cmu.sphinx
- sphinx4-parent
- 1.0-SNAPSHOT
-
-
- sphinx4-core
- jar
-
- Sphinx4 core
-
-
-
-
- org.apache.commons
- commons-math3
- 3.2
-
-
-
- edu.cmu.sphinx
- sphinx4-data
- 1.0-SNAPSHOT
- test
-
-
-
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/LongTextAligner.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/LongTextAligner.java
deleted file mode 100755
index 6b2f31f2..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/LongTextAligner.java
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Copyright 2014 Alpha Cephei Inc.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- *
- */
-
-package edu.cmu.sphinx.alignment;
-
-import static java.lang.Math.abs;
-import static java.lang.Math.max;
-import static java.lang.Math.min;
-import static java.util.Arrays.fill;
-import static java.util.Collections.emptyList;
-
-import java.util.*;
-
-import edu.cmu.sphinx.util.Range;
-import edu.cmu.sphinx.util.Utilities;
-
-/**
- *
- * @author Alexander Solovets
- */
-public class LongTextAligner {
-
- private final class Alignment {
-
- public final class Node {
-
- private final int databaseIndex;
- private final int queryIndex;
-
- private Node(int row, int column) {
- this.databaseIndex = column;
- this.queryIndex = row;
- }
-
- public int getDatabaseIndex() {
- return shifts.get(databaseIndex - 1);
- }
-
- public int getQueryIndex() {
- return indices.get(queryIndex - 1);
- }
-
- public String getQueryWord() {
- if (queryIndex > 0)
- return query.get(getQueryIndex());
- return null;
- }
-
- public String getDatabaseWord() {
- if (databaseIndex > 0)
- return reftup.get(getDatabaseIndex());
- return null;
- }
-
- public int getValue() {
- if (isBoundary())
- return max(queryIndex, databaseIndex);
- return hasMatch() ? 0 : 1;
- }
-
- public boolean hasMatch() {
- return getQueryWord().equals(getDatabaseWord());
- }
-
- public boolean isBoundary() {
- return queryIndex == 0 || databaseIndex == 0;
- }
-
- public boolean isTarget() {
- return queryIndex == indices.size() &&
- databaseIndex == shifts.size();
- }
-
- public List adjacent() {
- List result = new ArrayList(3);
- if (queryIndex < indices.size() &&
- databaseIndex < shifts.size()) {
- result.add(new Node(queryIndex + 1, databaseIndex + 1));
- }
- if (databaseIndex < shifts.size()) {
- result.add(new Node(queryIndex, databaseIndex + 1));
- }
- if (queryIndex < indices.size()) {
- result.add(new Node(queryIndex + 1, databaseIndex));
- }
-
- return result;
- }
-
- @Override
- public boolean equals(Object object) {
- if (!(object instanceof Node))
- return false;
-
- Node other = (Node) object;
- return queryIndex == other.queryIndex &&
- databaseIndex == other.databaseIndex;
- }
-
- @Override
- public int hashCode() {
- return 31 * (31 * queryIndex + databaseIndex);
- }
-
- @Override
- public String toString() {
- return String.format("[%d %d]", queryIndex, databaseIndex);
- }
- }
-
- private final List shifts;
- private final List query;
- private final List indices;
- private final List alignment;
-
- public Alignment(List query, Range range) {
- this.query = query;
- indices = new ArrayList();
- Set shiftSet = new TreeSet();
- for (int i = 0; i < query.size(); i++) {
- if (tupleIndex.containsKey(query.get(i))) {
- indices.add(i);
- for (Integer shift : tupleIndex.get(query.get(i))) {
- if (range.contains(shift))
- shiftSet.add(shift);
- }
- }
- }
-
- shifts = new ArrayList(shiftSet);
-
- final Map cost = new HashMap();
- PriorityQueue openSet = new PriorityQueue(1, new Comparator() {
- @Override
- public int compare(Node o1, Node o2) {
- return cost.get(o1).compareTo(cost.get(o2));
- }
- });
- Collection closedSet = new HashSet();
- Map parents = new HashMap();
-
- Node startNode = new Node(0, 0);
- cost.put(startNode, 0);
- openSet.add(startNode);
-
- while (!openSet.isEmpty()) {
- Node q = openSet.poll();
- if (closedSet.contains(q))
- continue;
-
- if (q.isTarget()) {
- List backtrace = new ArrayList();
- while (parents.containsKey(q)) {
- if (!q.isBoundary() && q.hasMatch())
- backtrace.add(q);
- q = parents.get(q);
- }
- alignment = new ArrayList(backtrace);
- Collections.reverse(alignment);
- return;
- }
-
- closedSet.add(q);
- for (Node nb : q.adjacent()) {
-
- if (closedSet.contains(nb))
- continue;
-
- // FIXME: move to appropriate location
- int l = abs(indices.size() - shifts.size() - q.queryIndex +
- q.databaseIndex) -
- abs(indices.size() - shifts.size() -
- nb.queryIndex +
- nb.databaseIndex);
-
- Integer oldScore = cost.get(nb);
- Integer qScore = cost.get(q);
- if (oldScore == null)
- oldScore = Integer.MAX_VALUE;
- if (qScore == null)
- qScore = Integer.MAX_VALUE;
-
- int newScore = qScore + nb.getValue() - l;
- if (newScore < oldScore) {
- cost.put(nb, newScore);
- openSet.add(nb);
- parents.put(nb, q);
- }
- }
- }
-
- alignment = emptyList();
- }
-
- public List getIndices() {
- return alignment;
- }
- }
-
- private final int tupleSize;
- private final List reftup;
- private final HashMap> tupleIndex;
- private List refWords;
-
- /**
- * Constructs new text aligner that servers requests for alignment of
- * sequence of words with the provided database sequence. Sequences are
- * aligned by tuples comprising one or more subsequent words.
- *
- * @param words list of words forming the database
- * @param tupleSize size of a tuple, must be greater or equal to 1
- */
- public LongTextAligner(List words, int tupleSize) {
- assert words != null;
- assert tupleSize > 0;
-
- this.tupleSize = tupleSize;
- this.refWords = words;
-
- int offset = 0;
- reftup = getTuples(words);
-
- tupleIndex = new HashMap>();
- for (String tuple : reftup) {
- ArrayList indexes = tupleIndex.get(tuple);
- if (indexes == null) {
- indexes = new ArrayList();
- tupleIndex.put(tuple, indexes);
- }
- indexes.add(offset++);
- }
- }
-
- /**
- * Aligns query sequence with the previously built database.
- * @param query list of words to look for
- *
- * @return indices of alignment
- */
- public int[] align(List query) {
- return align(query, new Range(0, refWords.size()));
- }
-
- /**
- * Aligns query sequence with the previously built database.
- * @param words list words to look for
- * @param range range of database to look for alignment
- *
- * @return indices of alignment
- */
- public int[] align(List words, Range range) {
-
- if (range.upperEndpoint() - range.lowerEndpoint() < tupleSize || words.size() < tupleSize) {
- return alignTextSimple(refWords.subList(range.lowerEndpoint(), range.upperEndpoint()), words, range.lowerEndpoint());
- }
-
- int[] result = new int[words.size()];
- fill(result, -1);
- int lastIndex = 0;
- for (Alignment.Node node : new Alignment(getTuples(words), range)
- .getIndices()) {
- // for (int j = 0; j < tupleSize; ++j)
- lastIndex = max(lastIndex, node.getQueryIndex());
- for (; lastIndex < node.getQueryIndex() + tupleSize; ++lastIndex)
- result[lastIndex] = node.getDatabaseIndex() + lastIndex -
- node.getQueryIndex();
- }
- return result;
- }
-
- /**
- * Makes list of tuples of the given size out of list of words.
- *
- * @param words words
- * @return list of tuples of size {@link #tupleSize}
- */
- private List getTuples(List words) {
- List result = new ArrayList();
- LinkedList tuple = new LinkedList();
-
- Iterator it = words.iterator();
- for (int i = 0; i < tupleSize - 1; i++) {
- tuple.add(it.next());
- }
- while (it.hasNext()) {
- tuple.addLast(it.next());
- result.add(Utilities.join(tuple));
- tuple.removeFirst();
- }
- return result;
- }
-
- static int[] alignTextSimple(List database, List query,
- int offset) {
- int n = database.size() + 1;
- int m = query.size() + 1;
- int[][] f = new int[n][m];
-
- f[0][0] = 0;
- for (int i = 1; i < n; ++i) {
- f[i][0] = i;
- }
-
- for (int j = 1; j < m; ++j) {
- f[0][j] = j;
- }
-
- for (int i = 1; i < n; ++i) {
- for (int j = 1; j < m; ++j) {
- int match = f[i - 1][j - 1];
- String refWord = database.get(i - 1);
- String queryWord = query.get(j - 1);
- if (!refWord.equals(queryWord)) {
- ++match;
- }
- int insert = f[i][j - 1] + 1;
- int delete = f[i - 1][j] + 1;
- f[i][j] = min(match, min(insert, delete));
- }
- }
-
- --n;
- --m;
- int[] alignment = new int[m];
- Arrays.fill(alignment, -1);
- while (m > 0) {
- if (n == 0) {
- --m;
- } else {
- String refWord = database.get(n - 1);
- String queryWord = query.get(m - 1);
- if (f[n - 1][m - 1] <= f[n - 1][m - 1]
- && f[n - 1][m - 1] <= f[n][m - 1]
- && refWord.equals(queryWord)) {
- alignment[--m] = --n + offset;
- } else {
- if (f[n - 1][m] < f[n][m - 1]) {
- --n;
- } else {
- --m;
- }
- }
- }
- }
-
- return alignment;
- }
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/SimpleTokenizer.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/SimpleTokenizer.java
deleted file mode 100755
index f0bfb654..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/SimpleTokenizer.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2014 Alpha Cephei Inc.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- */
-
-package edu.cmu.sphinx.alignment;
-
-import java.util.Arrays;
-import java.util.List;
-
-public class SimpleTokenizer implements TextTokenizer {
- public List expand(String text) {
-
- text = text.replace('’', '\'');
- text = text.replace('‘', ' ');
- text = text.replace('”', ' ');
- text = text.replace('“', ' ');
- text = text.replace('"', ' ');
- text = text.replace('»', ' ');
- text = text.replace('«', ' ');
- text = text.replace('–', '-');
- text = text.replace('—', ' ');
- text = text.replace('…', ' ');
-
- text = text.replace(" - ", " ");
- text = text.replaceAll("[/_*%]", " ");
- text = text.toLowerCase();
-
- String[] tokens = text.split("[.,?:!;()]");
- return Arrays.asList(tokens);
- }
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/TextTokenizer.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/TextTokenizer.java
deleted file mode 100755
index 53dd57d4..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/TextTokenizer.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright 2014 Alpha Cephei Inc.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- *
- */
-
-package edu.cmu.sphinx.alignment;
-
-import java.util.List;
-
-public interface TextTokenizer {
-
-
- /**
- * Cleans the text and returns the list of lines
- *
- * @param text Input text
- * @return a list of lines in the text.
- */
- List expand(String text);
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/Token.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/Token.java
deleted file mode 100755
index 1f033ac8..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/Token.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/**
- * Portions Copyright 2001 Sun Microsystems, Inc.
- * Portions Copyright 1999-2001 Language Technologies Institute,
- * Carnegie Mellon University.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- */
-package edu.cmu.sphinx.alignment;
-
-/**
- * Contains a parsed token from a Tokenizer.
- */
-public class Token {
-
- private String token = null;
- private String whitespace = null;
- private String prepunctuation = null;
- private String postpunctuation = null;
- private int position = 0; // position in the original input text
- private int lineNumber = 0;
-
- /**
- * Returns the whitespace characters of this Token.
- *
- * @return the whitespace characters of this Token; null if this Token does
- * not use whitespace characters
- */
- public String getWhitespace() {
- return whitespace;
- }
-
- /**
- * Returns the prepunctuation characters of this Token.
- *
- * @return the prepunctuation characters of this Token; null if this Token
- * does not use prepunctuation characters
- */
- public String getPrepunctuation() {
- return prepunctuation;
- }
-
- /**
- * Returns the postpunctuation characters of this Token.
- *
- * @return the postpunctuation characters of this Token; null if this Token
- * does not use postpunctuation characters
- */
- public String getPostpunctuation() {
- return postpunctuation;
- }
-
- /**
- * Returns the position of this token in the original input text.
- *
- * @return the position of this token in the original input text
- */
- public int getPosition() {
- return position;
- }
-
- /**
- * Returns the line of this token in the original text.
- *
- * @return the line of this token in the original text
- */
- public int getLineNumber() {
- return lineNumber;
- }
-
- /**
- * Sets the whitespace characters of this Token.
- *
- * @param whitespace the whitespace character for this token
- */
- public void setWhitespace(String whitespace) {
- this.whitespace = whitespace;
- }
-
- /**
- * Sets the prepunctuation characters of this Token.
- *
- * @param prepunctuation the prepunctuation characters
- */
- public void setPrepunctuation(String prepunctuation) {
- this.prepunctuation = prepunctuation;
- }
-
- /**
- * Sets the postpunctuation characters of this Token.
- *
- * @param postpunctuation the postpunctuation characters
- */
- public void setPostpunctuation(String postpunctuation) {
- this.postpunctuation = postpunctuation;
- }
-
- /**
- * Sets the position of the token in the original input text.
- *
- * @param position the position of the input text
- */
- public void setPosition(int position) {
- this.position = position;
- }
-
- /**
- * Set the line of this token in the original text.
- *
- * @param lineNumber the line of this token in the original text
- */
- public void setLineNumber(int lineNumber) {
- this.lineNumber = lineNumber;
- }
-
- /**
- * Returns the string associated with this token.
- *
- * @return the token if it exists; otherwise null
- */
- public String getWord() {
- return token;
- }
-
- /**
- * Sets the string of this Token.
- *
- * @param word the word for this token
- */
- public void setWord(String word) {
- token = word;
- }
-
- /**
- * Converts this token to a string.
- *
- * @return the string representation of this object
- */
- public String toString() {
- StringBuffer fullToken = new StringBuffer();
-
- if (whitespace != null) {
- fullToken.append(whitespace);
- }
- if (prepunctuation != null) {
- fullToken.append(prepunctuation);
- }
- if (token != null) {
- fullToken.append(token);
- }
- if (postpunctuation != null) {
- fullToken.append(postpunctuation);
- }
- return fullToken.toString();
- }
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/USEnglishTokenizer.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/USEnglishTokenizer.java
deleted file mode 100755
index 6dd6a1dd..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/USEnglishTokenizer.java
+++ /dev/null
@@ -1,1084 +0,0 @@
-/**
- * Portions Copyright 2001-2003 Sun Microsystems, Inc.
- * Portions Copyright 1999-2001 Language Technologies Institute,
- * Carnegie Mellon University.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- */
-package edu.cmu.sphinx.alignment;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import edu.cmu.sphinx.alignment.tokenizer.CharTokenizer;
-import edu.cmu.sphinx.alignment.tokenizer.DecisionTree;
-import edu.cmu.sphinx.alignment.tokenizer.FeatureSet;
-import edu.cmu.sphinx.alignment.tokenizer.Item;
-import edu.cmu.sphinx.alignment.tokenizer.NumberExpander;
-import edu.cmu.sphinx.alignment.tokenizer.PrefixFSM;
-import edu.cmu.sphinx.alignment.tokenizer.PronounceableFSM;
-import edu.cmu.sphinx.alignment.tokenizer.Relation;
-import edu.cmu.sphinx.alignment.tokenizer.SuffixFSM;
-import edu.cmu.sphinx.alignment.tokenizer.Utterance;
-import edu.cmu.sphinx.alignment.tokenizer.WordRelation;
-
-/**
- * Provides the definitions for US English whitespace, punctuations,
- * prepunctuation, and postpunctuation symbols. It also contains a set of
- * Regular Expressions for the US English language. With regular expressions,
- * it specifies what are whitespace, letters in the alphabet, uppercase and
- * lowercase letters, alphanumeric characters, identifiers, integers, doubles,
- * digits, and 'comma and int'.
- *
- * It translates the following code from flite: src/regex/cst_regex.c
- * lang/usenglish/us_text.c
- */
-class UsEnglish {
- /** default whitespace regular expression pattern */
- public static final String RX_DEFAULT_US_EN_WHITESPACE = "[ \n\t\r]+";
- /** default letter regular expression pattern */
- public static final String RX_DEFAULT_US_EN_ALPHABET = "[A-Za-z]+";
- /** default uppercase regular expression pattern */
- public static final String RX_DEFAULT_US_EN_UPPERCASE = "[A-Z]+";
- /** default lowercase regular expression pattern */
- public static final String RX_DEFAULT_US_EN_LOWERCASE = "[a-z]+";
- /** default alpha-numeric regular expression pattern */
- public static final String RX_DEFAULT_US_EN_ALPHANUMERIC = "[0-9A-Za-z]+";
- /** default identifier regular expression pattern */
- public static final String RX_DEFAULT_US_EN_IDENTIFIER =
- "[A-Za-z_][0-9A-Za-z_]+";
- /** default integer regular expression pattern */
- public static final String RX_DEFAULT_US_EN_INT = "-?[0-9]+";
- /** default double regular expression pattern */
- public static final String RX_DEFAULT_US_EN_DOUBLE =
- "-?(([0-9]+\\.[0-9]*)|([0-9]+)|(\\.[0-9]+))([eE][---+]?[0-9]+)?";
- /** default integer with commas regular expression pattern */
- public static final String RX_DEFAULT_US_EN_COMMAINT =
- "[0-9][0-9]?[0-9]?[,']([0-9][0-9][0-9][,'])*[0-9][0-9][0-9](\\.[0-9]+)?";
- /** default digits regular expression pattern */
- public static final String RX_DEFAULT_US_EN_DIGITS = "[0-9][0-9]*";
- /** default dotted abbreviation regular expression pattern */
- public static final String RX_DEFAULT_US_EN_DOTTED_ABBREV =
- "([A-Za-z]\\.)*[A-Za-z]";
- /** default ordinal number regular expression pattern */
- public static final String RX_DEFAULT_US_EN_ORDINAL_NUMBER =
- "[0-9][0-9,]*(th|TH|st|ST|nd|ND|rd|RD)";
- /** default has-vowel regular expression */
- public static final String RX_DEFAULT_HAS_VOWEL = ".*[aeiouAEIOU].*";
- /** default US money regular expression */
- public static final String RX_DEFAULT_US_MONEY = "\\$[0-9,]+(\\.[0-9]+)?";
- /** default -illion regular expression */
- public static final String RX_DEFAULT_ILLION = ".*illion";
- /** default digits2dash (e.g. 999-999-999) regular expression */
- public static final String RX_DEFAULT_DIGITS2DASH =
- "[0-9]+(-[0-9]+)(-[0-9]+)+";
- /** default digits/digits (e.g. 999/999) regular expression */
- public static final String RX_DEFAULT_DIGITSSLASHDIGITS = "[0-9]+/[0-9]+";
- /** default number time regular expression */
- public static final String RX_DEFAULT_NUMBER_TIME =
- "((0[0-2])|(1[0-9])):([0-5][0-9])";
- /** default Roman numerals regular expression */
- public static final String RX_DEFAULT_ROMAN_NUMBER =
- "(II?I?|IV|VI?I?I?|IX|X[VIX]*)";
- /** default drst "Dr. St" regular expression */
- public static final String RX_DEFAULT_DRST = "([dD][Rr]|[Ss][Tt])";
- /** default numess */
- public static final String RX_DEFAULT_NUMESS = "[0-9]+s";
- /** default 7-digit phone number */
- public static final String RX_DEFAULT_SEVEN_DIGIT_PHONE_NUMBER =
- "[0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]";
- /** default 4-digit number */
- public static final String RX_DEFAULT_FOUR_DIGIT = "[0-9][0-9][0-9][0-9]";
- /** default 3-digit number */
- public static final String RX_DEFAULT_THREE_DIGIT = "[0-9][0-9][0-9]";
-
- /** whitespace regular expression pattern */
- public static String RX_WHITESPACE = RX_DEFAULT_US_EN_WHITESPACE;
- /** letter regular expression pattern */
- public static String RX_ALPHABET = RX_DEFAULT_US_EN_ALPHABET;
- /** uppercase regular expression pattern */
- public static String RX_UPPERCASE = RX_DEFAULT_US_EN_UPPERCASE;
- /** lowercase regular expression pattern */
- public static String RX_LOWERCASE = RX_DEFAULT_US_EN_LOWERCASE;
- /** alphanumeric regular expression pattern */
- public static String RX_ALPHANUMERIC = RX_DEFAULT_US_EN_ALPHANUMERIC;
- /** identifier regular expression pattern */
- public static String RX_IDENTIFIER = RX_DEFAULT_US_EN_IDENTIFIER;
- /** integer regular expression pattern */
- public static String RX_INT = RX_DEFAULT_US_EN_INT;
- /** double regular expression pattern */
- public static String RX_DOUBLE = RX_DEFAULT_US_EN_DOUBLE;
- /** comma separated integer regular expression pattern */
- public static String RX_COMMAINT = RX_DEFAULT_US_EN_COMMAINT;
- /** digits regular expression pattern */
- public static String RX_DIGITS = RX_DEFAULT_US_EN_DIGITS;
- /** dotted abbreviation regular expression pattern */
- public static String RX_DOTTED_ABBREV = RX_DEFAULT_US_EN_DOTTED_ABBREV;
- /** ordinal number regular expression pattern */
- public static String RX_ORDINAL_NUMBER = RX_DEFAULT_US_EN_ORDINAL_NUMBER;
- /** has-vowel regular expression */
- public static final String RX_HAS_VOWEL = RX_DEFAULT_HAS_VOWEL;
- /** US money regular expression */
- public static final String RX_US_MONEY = RX_DEFAULT_US_MONEY;
- /** -illion regular expression */
- public static final String RX_ILLION = RX_DEFAULT_ILLION;
- /** digits2dash (e.g. 999-999-999) regular expression */
- public static final String RX_DIGITS2DASH = RX_DEFAULT_DIGITS2DASH;
- /** digits/digits (e.g. 999/999) regular expression */
- public static final String RX_DIGITSSLASHDIGITS =
- RX_DEFAULT_DIGITSSLASHDIGITS;
- /** number time regular expression */
- public static final String RX_NUMBER_TIME = RX_DEFAULT_NUMBER_TIME;
- /** Roman numerals regular expression */
- public static final String RX_ROMAN_NUMBER = RX_DEFAULT_ROMAN_NUMBER;
- /** drst "Dr. St" regular expression */
- public static final String RX_DRST = RX_DEFAULT_DRST;
- /** default numess */
- public static final String RX_NUMESS = RX_DEFAULT_NUMESS;
- /** 7-digit phone number */
- public static final String RX_SEVEN_DIGIT_PHONE_NUMBER =
- RX_DEFAULT_SEVEN_DIGIT_PHONE_NUMBER;
- /** 4-digit number */
- public static final String RX_FOUR_DIGIT = RX_DEFAULT_FOUR_DIGIT;
- /** 3-digit number */
- public static final String RX_THREE_DIGIT = RX_DEFAULT_THREE_DIGIT;
-
- // the following symbols are from lang/usenglish/us_text.c
-
- /** punctuation regular expression pattern */
- public static final String PUNCTUATION_SYMBOLS = "\"'`.,:;!?(){}[]";
- /** pre-punctuation regular expression pattern */
- public static final String PREPUNCTUATION_SYMBOLS = "\"'`({[";
- /** single char symbols regular expression pattern */
- public static final String SINGLE_CHAR_SYMBOLS = "";
- /** whitespace symbols regular expression pattern */
- public static final String WHITESPACE_SYMBOLS = " \t\n\r";
-
- /**
- * Not constructable
- */
- private UsEnglish() {}
-}
-
-
-/**
- * Converts the Tokens (in US English words) in an Utterance into a list of
- * words. It puts the produced list back into the Utterance. Usually, the
- * tokens that gets expanded are numbers like "23" (to "twenty" "three").
- *
- * * It translates the following code from flite:
- *
- * lang/usenglish/us_text.c
- *
- */
-public class USEnglishTokenizer implements TextTokenizer {
- // Patterns for regular expression matching
- private static final Pattern alphabetPattern;
- private static final Pattern commaIntPattern;
- private static final Pattern digits2DashPattern;
- private static final Pattern digitsPattern;
- private static final Pattern digitsSlashDigitsPattern;
- private static final Pattern dottedAbbrevPattern;
- private static final Pattern doublePattern;
- private static final Pattern drStPattern;
- private static final Pattern fourDigitsPattern;
- private static final Pattern illionPattern;
- private static final Pattern numberTimePattern;
- private static final Pattern numessPattern;
- private static final Pattern ordinalPattern;
- private static final Pattern romanNumbersPattern;
- private static final Pattern sevenPhoneNumberPattern;
- private static final Pattern threeDigitsPattern;
- private static final Pattern usMoneyPattern;
-
- static {
- alphabetPattern = Pattern.compile(UsEnglish.RX_ALPHABET);
- commaIntPattern = Pattern.compile(UsEnglish.RX_COMMAINT);
- digits2DashPattern = Pattern.compile(UsEnglish.RX_DIGITS2DASH);
- digitsPattern = Pattern.compile(UsEnglish.RX_DIGITS);
- digitsSlashDigitsPattern =
- Pattern.compile(UsEnglish.RX_DIGITSSLASHDIGITS);
- dottedAbbrevPattern = Pattern.compile(UsEnglish.RX_DOTTED_ABBREV);
- doublePattern = Pattern.compile(UsEnglish.RX_DOUBLE);
- drStPattern = Pattern.compile(UsEnglish.RX_DRST);
- fourDigitsPattern = Pattern.compile(UsEnglish.RX_FOUR_DIGIT);
- Pattern.compile(UsEnglish.RX_HAS_VOWEL);
- illionPattern = Pattern.compile(UsEnglish.RX_ILLION);
- numberTimePattern = Pattern.compile(UsEnglish.RX_NUMBER_TIME);
- numessPattern = Pattern.compile(UsEnglish.RX_NUMESS);
- ordinalPattern = Pattern.compile(UsEnglish.RX_ORDINAL_NUMBER);
- romanNumbersPattern = Pattern.compile(UsEnglish.RX_ROMAN_NUMBER);
- sevenPhoneNumberPattern =
- Pattern.compile(UsEnglish.RX_SEVEN_DIGIT_PHONE_NUMBER);
- threeDigitsPattern = Pattern.compile(UsEnglish.RX_THREE_DIGIT);
- usMoneyPattern = Pattern.compile(UsEnglish.RX_US_MONEY);
- }
-
- // King-like words
- private static final String[] kingNames = {"louis", "henry", "charles",
- "philip", "george", "edward", "pius", "william", "richard",
- "ptolemy", "john", "paul", "peter", "nicholas", "frederick",
- "james", "alfonso", "ivan", "napoleon", "leo", "gregory",
- "catherine", "alexandria", "pierre", "elizabeth", "mary", "elmo",
- "erasmus"};
-
- private static final String[] kingTitles = {"king", "queen", "pope",
- "duke", "tsar", "emperor", "shah", "caesar", "duchess", "tsarina",
- "empress", "baron", "baroness", "sultan", "count", "countess"};
-
- // Section-like words
- private static final String[] sectionTypes = {"section", "chapter",
- "part", "phrase", "verse", "scene", "act", "book", "volume",
- "chap", "war", "apollo", "trek", "fortran"};
-
- /**
- * Here we use a map for constant time matching, instead of using if
- * (A.equals(B) || A.equals(C) || ...) to match Strings
- */
- private static Map kingSectionLikeMap = new HashMap();
-
- private static final String KING_NAMES = "kingNames";
- private static final String KING_TITLES = "kingTitles";
- private static final String SECTION_TYPES = "sectionTypes";
-
- static {
- for (int i = 0; i < kingNames.length; i++) {
- kingSectionLikeMap.put(kingNames[i], KING_NAMES);
- }
- for (int i = 0; i < kingTitles.length; i++) {
- kingSectionLikeMap.put(kingTitles[i], KING_TITLES);
- }
- for (int i = 0; i < sectionTypes.length; i++) {
- kingSectionLikeMap.put(sectionTypes[i], SECTION_TYPES);
- }
- }
-
-
- // Finite state machines to check if a Token is pronounceable
- private PronounceableFSM prefixFSM = null;
- private PronounceableFSM suffixFSM = null;
-
- // List of US states abbreviations and their full names
- private static final String[][] usStates = {
- {"AL", "ambiguous", "alabama"}, {"Al", "ambiguous", "alabama"},
- {"Ala", "", "alabama"}, {"AK", "", "alaska"},
- {"Ak", "", "alaska"}, {"AZ", "", "arizona"},
- {"Az", "", "arizona"}, {"CA", "", "california"},
- {"Ca", "", "california"}, {"Cal", "ambiguous", "california"},
- {"Calif", "", "california"}, {"CO", "ambiguous", "colorado"},
- {"Co", "ambiguous", "colorado"}, {"Colo", "", "colorado"},
- {"DC", "", "d", "c"}, {"DE", "", "delaware"},
- {"De", "ambiguous", "delaware"}, {"Del", "ambiguous", "delaware"},
- {"FL", "", "florida"}, {"Fl", "ambiguous", "florida"},
- {"Fla", "", "florida"}, {"GA", "", "georgia"},
- {"Ga", "", "georgia"}, {"HI", "ambiguous", "hawaii"},
- {"Hi", "ambiguous", "hawaii"}, {"IA", "", "iowa"},
- {"Ia", "ambiguous", "iowa"}, {"IN", "ambiguous", "indiana"},
- {"In", "ambiguous", "indiana"}, {"Ind", "ambiguous", "indiana"},
- {"ID", "ambiguous", "idaho"}, {"IL", "ambiguous", "illinois"},
- {"Il", "ambiguous", "illinois"}, {"ILL", "ambiguous", "illinois"},
- {"KS", "", "kansas"}, {"Ks", "", "kansas"},
- {"Kans", "", "kansas"}, {"KY", "ambiguous", "kentucky"},
- {"Ky", "ambiguous", "kentucky"}, {"LA", "ambiguous", "louisiana"},
- {"La", "ambiguous", "louisiana"},
- {"Lou", "ambiguous", "louisiana"},
- {"Lous", "ambiguous", "louisiana"},
- {"MA", "ambiguous", "massachusetts"},
- {"Mass", "ambiguous", "massachusetts"},
- {"Ma", "ambiguous", "massachusetts"},
- {"MD", "ambiguous", "maryland"}, {"Md", "ambiguous", "maryland"},
- {"ME", "ambiguous", "maine"}, {"Me", "ambiguous", "maine"},
- {"MI", "", "michigan"}, {"Mi", "ambiguous", "michigan"},
- {"Mich", "ambiguous", "michigan"},
- {"MN", "ambiguous", "minnestota"},
- {"Minn", "ambiguous", "minnestota"},
- {"MS", "ambiguous", "mississippi"},
- {"Miss", "ambiguous", "mississippi"},
- {"MT", "ambiguous", "montanna"}, {"Mt", "ambiguous", "montanna"},
- {"MO", "ambiguous", "missouri"}, {"Mo", "ambiguous", "missouri"},
- {"NC", "ambiguous", "north", "carolina"},
- {"ND", "ambiguous", "north", "dakota"},
- {"NE", "ambiguous", "nebraska"}, {"Ne", "ambiguous", "nebraska"},
- {"Neb", "ambiguous", "nebraska"},
- {"NH", "ambiguous", "new", "hampshire"}, {"NV", "", "nevada"},
- {"Nev", "", "nevada"}, {"NY", "", "new", "york"},
- {"OH", "ambiguous", "ohio"}, {"OK", "ambiguous", "oklahoma"},
- {"Okla", "", "oklahoma"}, {"OR", "ambiguous", "oregon"},
- {"Or", "ambiguous", "oregon"}, {"Ore", "ambiguous", "oregon"},
- {"PA", "ambiguous", "pennsylvania"},
- {"Pa", "ambiguous", "pennsylvania"},
- {"Penn", "ambiguous", "pennsylvania"},
- {"RI", "ambiguous", "rhode", "island"},
- {"SC", "ambiguous", "south", "carlolina"},
- {"SD", "ambiguous", "south", "dakota"},
- {"TN", "ambiguous", "tennesee"}, {"Tn", "ambiguous", "tennesee"},
- {"Tenn", "ambiguous", "tennesee"}, {"TX", "ambiguous", "texas"},
- {"Tx", "ambiguous", "texas"}, {"Tex", "ambiguous", "texas"},
- {"UT", "ambiguous", "utah"}, {"VA", "ambiguous", "virginia"},
- {"WA", "ambiguous", "washington"},
- {"Wa", "ambiguous", "washington"},
- {"Wash", "ambiguous", "washington"},
- {"WI", "ambiguous", "wisconsin"},
- {"Wi", "ambiguous", "wisconsin"},
- {"WV", "ambiguous", "west", "virginia"},
- {"WY", "ambiguous", "wyoming"}, {"Wy", "ambiguous", "wyoming"},
- {"Wyo", "", "wyoming"}, {"PR", "ambiguous", "puerto", "rico"}};
-
- // Again map for constant time searching.
- private static Map usStatesMap = new HashMap();
- static {
- for (int i = 0; i < usStates.length; i++) {
- usStatesMap.put(usStates[i][0], usStates[i]);
- }
- };
-
- // class variables
-
- // the word relation that we are building
- private WordRelation wordRelation;
-
- // the current token Item
- private Item tokenItem;
-
- // a CART for classifying numbers
- private DecisionTree cart;
-
- /**
- * Constructs a default USTokenWordProcessor. It uses the USEnglish regular
- * expression set (USEngRegExp) by default.
- */
- public USEnglishTokenizer() {
- try {
- cart = new DecisionTree(getClass().getResource("nums_cart.txt"));
- prefixFSM =
- new PrefixFSM(getClass().getResource("prefix_fsm.txt"));
- suffixFSM =
- new SuffixFSM(getClass().getResource("suffix_fsm.txt"));
- } catch (IOException e) {
- throw new IllegalStateException("resources not found", e);
- }
- }
-
- /**
- * Returns the currently processing token Item.
- *
- * @return the current token Item; null if no item
- */
- public Item getTokenItem() {
- return tokenItem;
- }
-
- /**
- * Process the utterance
- *
- * @param text the utterance containing the tokens
- * @return the list of tokens
- */
- public List expand(String text) {
-
- String simplifiedText = simplifyChars(text);
-
- CharTokenizer tokenizer = new CharTokenizer();
- tokenizer.setWhitespaceSymbols(UsEnglish.WHITESPACE_SYMBOLS);
- tokenizer.setSingleCharSymbols(UsEnglish.SINGLE_CHAR_SYMBOLS);
- tokenizer.setPrepunctuationSymbols(UsEnglish.PREPUNCTUATION_SYMBOLS);
- tokenizer.setPostpunctuationSymbols(UsEnglish.PUNCTUATION_SYMBOLS);
- tokenizer.setInputText(simplifiedText);
- Utterance utterance = new Utterance(tokenizer);
-
- Relation tokenRelation;
- if ((tokenRelation = utterance.getRelation(Relation.TOKEN)) == null) {
- throw new IllegalStateException("token relation does not exist");
- }
-
- wordRelation = WordRelation.createWordRelation(utterance, this);
-
- for (tokenItem = tokenRelation.getHead(); tokenItem != null; tokenItem =
- tokenItem.getNext()) {
-
- FeatureSet featureSet = tokenItem.getFeatures();
- String tokenVal = featureSet.getString("name");
-
- // convert the token into a list of words
- tokenToWords(tokenVal);
- }
-
- List words = new ArrayList();
- for (Item item = utterance.getRelation(Relation.WORD).getHead(); item != null; item =
- item.getNext()) {
- if (!item.toString().isEmpty() && !item.toString().contains("#")) {
- words.add(item.toString());
- }
- }
- return words;
- }
-
- private String simplifyChars(String text) {
- text = text.replace('’', '\'');
- text = text.replace('‘', '\'');
- text = text.replace('”', '"');
- text = text.replace('“', '"');
- text = text.replace('»', '"');
- text = text.replace('«', '"');
- text = text.replace('–', '-');
- text = text.replace('—', ' ');
- text = text.replace('…', ' ');
- text = text.replace((char)0xc, ' ');
- return text;
- }
-
- /**
- * Returns true if the given token matches part of a phone number
- *
- * @param tokenItem the token
- * @param tokenVal the string value of the token
- *
- * @return true or false
- */
- private boolean matchesPartPhoneNumber(String tokenVal) {
-
- String n_name = (String) tokenItem.findFeature("n.name");
- String n_n_name = (String) tokenItem.findFeature("n.n.name");
- String p_name = (String) tokenItem.findFeature("p.name");
- String p_p_name = (String) tokenItem.findFeature("p.p.name");
-
- boolean matches3DigitsP_name = matches(threeDigitsPattern, p_name);
-
- return ((matches(threeDigitsPattern, tokenVal) && ((!matches(
- digitsPattern, p_name) && matches(threeDigitsPattern, n_name) && matches(
- fourDigitsPattern, n_n_name))
- || (matches(sevenPhoneNumberPattern, n_name)) || (!matches(
- digitsPattern, p_p_name) && matches3DigitsP_name && matches(
- fourDigitsPattern, n_name)))) || (matches(
- fourDigitsPattern, tokenVal) && (!matches(digitsPattern,
- n_name) && matches3DigitsP_name && matches(threeDigitsPattern,
- p_p_name))));
- }
-
-
- /**
- * Converts the given Token into (word) Items in the WordRelation.
- *
- * @param tokenVal the String value of the token, which may or may not be
- * same as the one in called "name" in flite
- *
- */
- private void tokenToWords(String tokenVal) {
- FeatureSet tokenFeatures = tokenItem.getFeatures();
- String itemName = tokenFeatures.getString("name");
- int tokenLength = tokenVal.length();
-
- if (tokenFeatures.isPresent("phones")) {
- wordRelation.addWord(tokenVal);
-
- } else if ((tokenVal.equals("a") || tokenVal.equals("A"))
- && ((tokenItem.getNext() == null)
- || !(tokenVal.equals(itemName)) || !(((String) tokenItem
- .findFeature("punc")).equals("")))) {
- /* if A is a sub part of a token, then its ey not ah */
- wordRelation.addWord("_a");
-
- } else if (matches(alphabetPattern, tokenVal)) {
-
- if (matches(romanNumbersPattern, tokenVal)) {
-
- /* XVIII */
- romanToWords(tokenVal);
-
- } else if (matches(illionPattern, tokenVal)
- && matches(usMoneyPattern,
- (String) tokenItem.findFeature("p.name"))) {
- /* $ X -illion */
- wordRelation.addWord(tokenVal);
- wordRelation.addWord("dollars");
-
- } else if (matches(drStPattern, tokenVal)) {
- /* St Andrew's St, Dr King Dr */
- drStToWords(tokenVal);
- } else if (tokenVal.equals("Mr")) {
- tokenItem.getFeatures().setString("punc", "");
- wordRelation.addWord("mister");
- } else if (tokenVal.equals("Mrs")) {
- tokenItem.getFeatures().setString("punc", "");
- wordRelation.addWord("missus");
- } else if (tokenLength == 1
- && Character.isUpperCase(tokenVal.charAt(0))
- && ((String) tokenItem.findFeature("n.whitespace"))
- .equals(" ")
- && Character.isUpperCase(((String) tokenItem
- .findFeature("n.name")).charAt(0))) {
-
- tokenFeatures.setString("punc", "");
- String aaa = tokenVal.toLowerCase();
- if (aaa.equals("a")) {
- wordRelation.addWord("_a");
- } else {
- wordRelation.addWord(aaa);
- }
- } else if (isStateName(tokenVal)) {
- /*
- * The name of a US state isStateName() has already added the
- * full name of the state, so we're all set.
- */
- } else if (tokenLength > 1 && !isPronounceable(tokenVal)) {
- /* Need common exception list */
- /* unpronouncable list of alphas */
- NumberExpander.expandLetters(tokenVal, wordRelation);
-
- } else {
- /* just a word */
- wordRelation.addWord(tokenVal.toLowerCase());
- }
-
- } else if (matches(dottedAbbrevPattern, tokenVal)) {
-
- /* U.S.A. */
- // remove all dots
- NumberExpander.expandLetters(tokenVal.replace(".", ""),
- wordRelation);
-
- } else if (matches(commaIntPattern, tokenVal)) {
-
- /* 99,999,999 */
- NumberExpander.expandReal(tokenVal.replace(",", "").replace("'", ""), wordRelation);
-
- } else if (matches(sevenPhoneNumberPattern, tokenVal)) {
-
- /* 234-3434 telephone numbers */
- int dashIndex = tokenVal.indexOf('-');
- String aaa = tokenVal.substring(0, dashIndex);
- String bbb = tokenVal.substring(dashIndex + 1);
-
- NumberExpander.expandDigits(aaa, wordRelation);
- wordRelation.addBreak();
- NumberExpander.expandDigits(bbb, wordRelation);
-
- } else if (matchesPartPhoneNumber(tokenVal)) {
-
- /* part of a telephone number */
- String punctuation = (String) tokenItem.findFeature("punc");
- if (punctuation.equals("")) {
- tokenItem.getFeatures().setString("punc", ",");
- }
- NumberExpander.expandDigits(tokenVal, wordRelation);
- wordRelation.addBreak();
-
- } else if (matches(numberTimePattern, tokenVal)) {
- /* 12:35 */
- int colonIndex = tokenVal.indexOf(':');
- String aaa = tokenVal.substring(0, colonIndex);
- String bbb = tokenVal.substring(colonIndex + 1);
-
- NumberExpander.expandNumber(aaa, wordRelation);
- if (!(bbb.equals("00"))) {
- NumberExpander.expandID(bbb, wordRelation);
- }
- } else if (matches(digits2DashPattern, tokenVal)) {
- /* 999-999-999 */
- digitsDashToWords(tokenVal);
- } else if (matches(digitsPattern, tokenVal)) {
- digitsToWords(tokenVal);
- } else if (tokenLength == 1
- && Character.isUpperCase(tokenVal.charAt(0))
- && ((String) tokenItem.findFeature("n.whitespace"))
- .equals(" ")
- && Character.isUpperCase(((String) tokenItem
- .findFeature("n.name")).charAt(0))) {
-
- tokenFeatures.setString("punc", "");
- String aaa = tokenVal.toLowerCase();
- if (aaa.equals("a")) {
- wordRelation.addWord("_a");
- } else {
- wordRelation.addWord(aaa);
- }
- } else if (matches(doublePattern, tokenVal)) {
- NumberExpander.expandReal(tokenVal, wordRelation);
- } else if (matches(ordinalPattern, tokenVal)) {
- /* explicit ordinals */
- String aaa = tokenVal.substring(0, tokenLength - 2);
- NumberExpander.expandOrdinal(aaa, wordRelation);
- } else if (matches(usMoneyPattern, tokenVal)) {
- /* US money */
- usMoneyToWords(tokenVal);
- } else if (tokenLength > 0 && tokenVal.charAt(tokenLength - 1) == '%') {
- /* Y% */
- tokenToWords(tokenVal.substring(0, tokenLength - 1));
- wordRelation.addWord("percent");
- } else if (matches(numessPattern, tokenVal)) {
- NumberExpander.expandNumess(tokenVal.substring(0, tokenLength - 1), wordRelation);
- } else if (matches(digitsSlashDigitsPattern, tokenVal)
- && tokenVal.equals(itemName)) {
- digitsSlashDigitsToWords(tokenVal);
- } else if (tokenVal.indexOf('-') != -1) {
- dashToWords(tokenVal);
- } else if (tokenLength > 1 && !matches(alphabetPattern, tokenVal)) {
- notJustAlphasToWords(tokenVal);
- } else if (tokenVal.equals("&")) {
- // &
- wordRelation.addWord("and");
- } else if (tokenVal.equals("-")) {
- // Skip it
- } else {
- // Just a word.
- wordRelation.addWord(tokenVal.toLowerCase());
- }
- }
-
- /**
- * Convert the given digit token with dashes (e.g. 999-999-999) into (word)
- * Items in the WordRelation.
- *
- * @param tokenVal the digit string
- */
- private void digitsDashToWords(String tokenVal) {
- int tokenLength = tokenVal.length();
- int a = 0;
- for (int p = 0; p <= tokenLength; p++) {
- if (p == tokenLength || tokenVal.charAt(p) == '-') {
- String aaa = tokenVal.substring(a, p);
- NumberExpander.expandDigits(aaa, wordRelation);
- wordRelation.addBreak();
- a = p + 1;
- }
- }
- }
-
- /**
- * Convert the given digit token into (word) Items in the WordRelation.
- *
- * @param tokenVal the digit string
- */
- private void digitsToWords(String tokenVal) {
- FeatureSet featureSet = tokenItem.getFeatures();
- String nsw = "";
- if (featureSet.isPresent("nsw")) {
- nsw = featureSet.getString("nsw");
- }
-
- if (nsw.equals("nide")) {
- NumberExpander.expandID(tokenVal, wordRelation);
- } else {
- String rName = featureSet.getString("name");
- String digitsType = null;
-
- if (tokenVal.equals(rName)) {
- digitsType = (String) cart.interpret(tokenItem);
- } else {
- featureSet.setString("name", tokenVal);
- digitsType = (String) cart.interpret(tokenItem);
- featureSet.setString("name", rName);
- }
-
- if (digitsType.equals("ordinal")) {
- NumberExpander.expandOrdinal(tokenVal, wordRelation);
- } else if (digitsType.equals("digits")) {
- NumberExpander.expandDigits(tokenVal, wordRelation);
- } else if (digitsType.equals("year")) {
- NumberExpander.expandID(tokenVal, wordRelation);
- } else {
- NumberExpander.expandNumber(tokenVal, wordRelation);
- }
- }
- }
-
- /**
- * Converts the given Roman numeral string into (word) Items in the
- * WordRelation.
- *
- * @param romanString the roman numeral string
- */
- private void romanToWords(String romanString) {
- String punctuation = (String) tokenItem.findFeature("p.punc");
-
- if (punctuation.equals("")) {
- /* no preceeding punctuation */
- String n = String.valueOf(NumberExpander.expandRoman(romanString));
-
- if (kingLike(tokenItem)) {
- wordRelation.addWord("the");
- NumberExpander.expandOrdinal(n, wordRelation);
- } else if (sectionLike(tokenItem)) {
- NumberExpander.expandNumber(n, wordRelation);
- } else {
- NumberExpander.expandLetters(romanString, wordRelation);
- }
- } else {
- NumberExpander.expandLetters(romanString, wordRelation);
- }
- }
-
- /**
- * Returns true if the given key is in the {@link #kingSectionLikeMap} map,
- * and the value is the same as the given value.
- *
- * @param key key to look for in the map
- * @param value the value to match
- *
- * @return true if it matches, or false if it does not or if the key is not
- * mapped to any value in the map.
- */
- private static boolean inKingSectionLikeMap(String key, String value) {
- if (kingSectionLikeMap.containsKey(key)) {
- return kingSectionLikeMap.get(key).equals(value);
- }
- return false;
- }
-
- /**
- * Returns true if the given token item contains a token that is in a
- * king-like context, e.g., "King" or "Louis".
- *
- * @param tokenItem the token item to check
- *
- * @return true or false
- */
- public static boolean kingLike(Item tokenItem) {
- String kingName =
- ((String) tokenItem.findFeature("p.name")).toLowerCase();
- if (inKingSectionLikeMap(kingName, KING_NAMES)) {
- return true;
- } else {
- String kingTitle =
- ((String) tokenItem.findFeature("p.p.name")).toLowerCase();
- return inKingSectionLikeMap(kingTitle, KING_TITLES);
- }
- }
-
- /**
- * Returns true if the given token item contains a token that is in a
- * section-like context, e.g., "chapter" or "act".
- *
- * @param tokenItem the token item to check
- *
- * @return true or false
- */
- public static boolean sectionLike(Item tokenItem) {
- String sectionType =
- ((String) tokenItem.findFeature("p.name")).toLowerCase();
- return inKingSectionLikeMap(sectionType, SECTION_TYPES);
- }
-
- /**
- * Converts the given string containing "St" and "Dr" to (word) Items in
- * the WordRelation.
- *
- * @param drStString the string with "St" and "Dr"
- */
- private void drStToWords(String drStString) {
- String street = null;
- String saint = null;
- char c0 = drStString.charAt(0);
-
- if (c0 == 's' || c0 == 'S') {
- street = "street";
- saint = "saint";
- } else {
- street = "drive";
- saint = "doctor";
- }
-
- FeatureSet featureSet = tokenItem.getFeatures();
- String punctuation = featureSet.getString("punc");
-
- String featPunctuation = (String) tokenItem.findFeature("punc");
-
- if (tokenItem.getNext() == null || punctuation.indexOf(',') != -1) {
- wordRelation.addWord(street);
- } else if (featPunctuation.equals(",")) {
- wordRelation.addWord(saint);
- } else {
- String pName = (String) tokenItem.findFeature("p.name");
- String nName = (String) tokenItem.findFeature("n.name");
-
- char p0 = pName.charAt(0);
- char n0 = nName.charAt(0);
-
- if (Character.isUpperCase(p0) && Character.isLowerCase(n0)) {
- wordRelation.addWord(street);
- } else if (Character.isDigit(p0) && Character.isLowerCase(n0)) {
- wordRelation.addWord(street);
- } else if (Character.isLowerCase(p0) && Character.isUpperCase(n0)) {
- wordRelation.addWord(saint);
- } else {
- String whitespace =
- (String) tokenItem.findFeature("n.whitespace");
- if (whitespace.equals(" ")) {
- wordRelation.addWord(saint);
- } else {
- wordRelation.addWord(street);
- }
- }
- }
-
- if (punctuation != null && punctuation.equals(".")) {
- featureSet.setString("punc", "");
- }
- }
-
- /**
- * Converts US money string into (word) Items in the WordRelation.
- *
- * @param tokenVal the US money string
- */
- private void usMoneyToWords(String tokenVal) {
- int dotIndex = tokenVal.indexOf('.');
- if (matches(illionPattern, (String) tokenItem.findFeature("n.name"))) {
- NumberExpander.expandReal(tokenVal.substring(1), wordRelation);
- } else if (dotIndex == -1) {
- String aaa = tokenVal.substring(1);
- tokenToWords(aaa);
- if (aaa.equals("1")) {
- wordRelation.addWord("dollar");
- } else {
- wordRelation.addWord("dollars");
- }
- } else if (dotIndex == (tokenVal.length() - 1)
- || (tokenVal.length() - dotIndex) > 3) {
- // Simply read as mumble point mumble.
- NumberExpander.expandReal(tokenVal.substring(1), wordRelation);
- wordRelation.addWord("dollars");
- } else {
- String aaa = tokenVal.substring(1, dotIndex).replace(",", "");
- String bbb = tokenVal.substring(dotIndex + 1);
-
- NumberExpander.expandNumber(aaa, wordRelation);
-
- if (aaa.equals("1")) {
- wordRelation.addWord("dollar");
- } else {
- wordRelation.addWord("dollars");
- }
-
- if (bbb.equals("00")) {
- // Add nothing to the word list.
- } else {
- NumberExpander.expandNumber(bbb, wordRelation);
- if (bbb.equals("01")) {
- wordRelation.addWord("cent");
- } else {
- wordRelation.addWord("cents");
- }
- }
- }
- }
-
- /**
- * Convert the given digits/digits string into word (Items) in the
- * WordRelation.
- *
- * @param tokenVal the digits/digits string
- */
- private void digitsSlashDigitsToWords(String tokenVal) {
-
- /* might be fraction, or not */
- int index = tokenVal.indexOf('/');
- String aaa = tokenVal.substring(0, index);
- String bbb = tokenVal.substring(index + 1);
- int a;
-
- // if the previous token is a number, add an "and"
- if (matches(digitsPattern, (String) tokenItem.findFeature("p.name"))
- && tokenItem.getPrevious() != null) {
- wordRelation.addWord("and");
- }
-
- if (aaa.equals("1") && bbb.equals("2")) {
- wordRelation.addWord("a");
- wordRelation.addWord("half");
- } else if ((a = Integer.parseInt(aaa)) < (Integer.parseInt(bbb))) {
- NumberExpander.expandNumber(aaa, wordRelation);
- NumberExpander.expandOrdinal(bbb, wordRelation);
- if (a > 1) {
- wordRelation.addWord("'s");
- }
- } else {
- NumberExpander.expandNumber(aaa, wordRelation);
- wordRelation.addWord("slash");
- NumberExpander.expandNumber(bbb, wordRelation);
- }
- }
-
- /**
- * Convert the given dashed string (e.g. "aaa-bbb") into (word) Items in
- * the WordRelation.
- *
- * @param tokenVal the dashed string
- */
- private void dashToWords(String tokenVal) {
- int index = tokenVal.indexOf('-');
- String aaa = tokenVal.substring(0, index);
- String bbb = tokenVal.substring(index + 1, tokenVal.length());
-
- if (matches(digitsPattern, aaa) && matches(digitsPattern, bbb)) {
- FeatureSet featureSet = tokenItem.getFeatures();
- featureSet.setString("name", aaa);
- tokenToWords(aaa);
- wordRelation.addWord("to");
- featureSet.setString("name", bbb);
- tokenToWords(bbb);
- featureSet.setString("name", "");
- } else {
- tokenToWords(aaa);
- tokenToWords(bbb);
- }
- }
-
- /**
- * Convert the given string (which does not only consist of alphabet) into
- * (word) Items in the WordRelation.
- *
- * @param tokenVal the string
- */
- private void notJustAlphasToWords(String tokenVal) {
- /* its not just alphas */
- int index = 0;
- int tokenLength = tokenVal.length();
-
- for (; index < tokenLength - 1; index++) {
- if (isTextSplitable(tokenVal, index)) {
- break;
- }
- }
- if (index == tokenLength - 1) {
- wordRelation.addWord(tokenVal.toLowerCase());
- return;
- }
-
- String aaa = tokenVal.substring(0, index + 1);
- String bbb = tokenVal.substring(index + 1, tokenLength);
-
- FeatureSet featureSet = tokenItem.getFeatures();
- featureSet.setString("nsw", "nide");
- tokenToWords(aaa);
- tokenToWords(bbb);
- }
-
- /**
- * Returns true if the given word is pronounceable. This method is
- * originally called us_aswd() in Flite 1.1.
- *
- * @param word the word to test
- *
- * @return true if the word is pronounceable, false otherwise
- */
- public boolean isPronounceable(String word) {
- String lcWord = word.toLowerCase();
- return prefixFSM.accept(lcWord) && suffixFSM.accept(lcWord);
- }
-
- /**
- * Returns true if the given token is the name of a US state. If it is, it
- * will add the name of the state to (word) Items in the WordRelation.
- *
- * @param tokenVal the token string
- */
- private boolean isStateName(String tokenVal) {
- String[] state = (String[]) usStatesMap.get(tokenVal);
- if (state != null) {
- boolean expandState = false;
-
- // check to see if the state initials are ambiguous
- // in the English language
- if (state[1].equals("ambiguous")) {
- String previous = (String) tokenItem.findFeature("p.name");
- String next = (String) tokenItem.findFeature("n.name");
-
- int nextLength = next.length();
- FeatureSet featureSet = tokenItem.getFeatures();
-
- // check if the previous word starts with a capital letter,
- // is at least 3 letters long, is an alphabet sequence,
- // and has a comma.
- boolean previousIsCity =
- (Character.isUpperCase(previous.charAt(0))
- && previous.length() > 2
- && matches(alphabetPattern, previous) && tokenItem
- .findFeature("p.punc").equals(","));
-
- // check if next token starts with a lower case, or
- // this is the end of sentence, or if next token
- // is a period (".") or a zip code (5 or 10 digits).
- boolean nextIsGood =
- (Character.isLowerCase(next.charAt(0))
- || tokenItem.getNext() == null
- || featureSet.getString("punc").equals(".") || ((nextLength == 5 || nextLength == 10) && matches(
- digitsPattern, next)));
-
- if (previousIsCity && nextIsGood) {
- expandState = true;
- } else {
- expandState = false;
- }
- } else {
- expandState = true;
- }
- if (expandState) {
- for (int j = 2; j < state.length; j++) {
- if (state[j] != null) {
- wordRelation.addWord(state[j]);
- }
- }
- return true;
- }
- }
- return false;
- }
-
- /**
- * Determines if the given input matches the given Pattern.
- *
- * @param pattern the pattern to match
- * @param input the string to test
- *
- * @return true if the input string matches the given Pattern;
- * false otherwise
- */
- private static boolean matches(Pattern pattern, String input) {
- Matcher m = pattern.matcher(input);
- return m.matches();
- }
-
- /**
- * Determines if the character at the given position of the given input
- * text is splittable. A character is splittable if:
- *
- * 1) the character and the following character are not letters in the
- * English alphabet (A-Z and a-z)
- *
- * 2) the character and the following character are not digits (0-9)
- *
- *
- * @param text the text containing the character of interest
- * @param index the index of the character of interest
- *
- * @return true if the position of the given text is splittable false
- * otherwise
- */
- private static boolean isTextSplitable(String text, int index) {
-
-
- char c0 = text.charAt(index);
- char c1 = text.charAt(index + 1);
-
- if (Character.isLetter(c0) && Character.isLetter(c1)) {
- return false;
- } else if (Character.isDigit(c0) && Character.isDigit(c1)) {
- return false;
- } else if (c0 == '\'' || Character.isLetter(c1)) {
- return false;
- } else if (c1 == '\'' || Character.isLetter(c0)) {
- return false;
- } else {
- return true;
- }
- }
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/CharTokenizer.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/CharTokenizer.java
deleted file mode 100755
index 980e935a..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/CharTokenizer.java
+++ /dev/null
@@ -1,405 +0,0 @@
-/**
- * Portions Copyright 2001 Sun Microsystems, Inc.
- * Portions Copyright 1999-2001 Language Technologies Institute,
- * Carnegie Mellon University.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- */
-package edu.cmu.sphinx.alignment.tokenizer;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Iterator;
-
-import edu.cmu.sphinx.alignment.Token;
-
-/**
- * Implements the tokenizer interface. Breaks an input sequence of characters
- * into a set of tokens.
- */
-public class CharTokenizer implements Iterator {
-
- /** A constant indicating that the end of the stream has been read. */
- public static final int EOF = -1;
-
- /** A string containing the default whitespace characters. */
- public static final String DEFAULT_WHITESPACE_SYMBOLS = " \t\n\r";
-
- /** A string containing the default single characters. */
- public static final String DEFAULT_SINGLE_CHAR_SYMBOLS = "(){}[]";
-
- /** A string containing the default pre-punctuation characters. */
- public static final String DEFAULT_PREPUNCTUATION_SYMBOLS = "\"'`({[";
-
- /** A string containing the default post-punctuation characters. */
- public static final String DEFAULT_POSTPUNCTUATION_SYMBOLS =
- "\"'`.,:;!?(){}[]";
-
- /** The line number. */
- private int lineNumber;
-
- /** The input text (from the Utterance) to tokenize. */
- private String inputText;
-
- /** The file to read input text from, if using file mode. */
- private Reader reader;
-
- /** The current character, whether its from the file or the input text. */
- private int currentChar;
-
- /**
- * The current char position for the input text (not the file) this is
- * called "file_pos" in flite
- */
- private int currentPosition;
-
- /** The delimiting symbols of this tokenizer. */
- private String whitespaceSymbols = DEFAULT_WHITESPACE_SYMBOLS;
- private String singleCharSymbols = DEFAULT_SINGLE_CHAR_SYMBOLS;
- private String prepunctuationSymbols = DEFAULT_PREPUNCTUATION_SYMBOLS;
- private String postpunctuationSymbols = DEFAULT_POSTPUNCTUATION_SYMBOLS;
-
- /** The error description. */
- private String errorDescription;
-
- /** A place to store the current token. */
- private Token token;
- private Token lastToken;
-
- /**
- * Constructs a Tokenizer.
- */
- public CharTokenizer() {}
-
- /**
- * Creates a tokenizer that will return tokens from the given string.
- *
- * @param string the string to tokenize
- */
- public CharTokenizer(String string) {
- setInputText(string);
- }
-
- /**
- * Creates a tokenizer that will return tokens from the given file.
- *
- * @param file where to read the input from
- */
- public CharTokenizer(Reader file) {
- setInputReader(file);
- }
-
- /**
- * Sets the whitespace symbols of this Tokenizer to the given symbols.
- *
- * @param symbols the whitespace symbols
- */
- public void setWhitespaceSymbols(String symbols) {
- whitespaceSymbols = symbols;
- }
-
- /**
- * Sets the single character symbols of this Tokenizer to the given
- * symbols.
- *
- * @param symbols the single character symbols
- */
- public void setSingleCharSymbols(String symbols) {
- singleCharSymbols = symbols;
- }
-
- /**
- * Sets the prepunctuation symbols of this Tokenizer to the given symbols.
- *
- * @param symbols the prepunctuation symbols
- */
- public void setPrepunctuationSymbols(String symbols) {
- prepunctuationSymbols = symbols;
- }
-
- /**
- * Sets the postpunctuation symbols of this Tokenizer to the given symbols.
- *
- * @param symbols the postpunctuation symbols
- */
- public void setPostpunctuationSymbols(String symbols) {
- postpunctuationSymbols = symbols;
- }
-
- /**
- * Sets the text to tokenize.
- *
- * @param inputString the string to tokenize
- */
- public void setInputText(String inputString) {
- inputText = inputString;
- currentPosition = 0;
-
- if (inputText != null) {
- getNextChar();
- }
- }
-
- /**
- * Sets the input reader
- *
- * @param reader the input source
- */
- public void setInputReader(Reader reader) {
- this.reader = reader;
- getNextChar();
- }
-
- /**
- * Returns the next token.
- *
- * @return the next token if it exists, null if no more tokens
- */
- public Token next() {
- lastToken = token;
- token = new Token();
-
- // Skip whitespace
- token.setWhitespace(getTokenOfCharClass(whitespaceSymbols));
-
- // quoted strings currently ignored
-
- // get prepunctuation
- token.setPrepunctuation(getTokenOfCharClass(prepunctuationSymbols));
-
- // get the symbol itself
- if (singleCharSymbols.indexOf(currentChar) != -1) {
- token.setWord(String.valueOf((char) currentChar));
- getNextChar();
- } else {
- token.setWord(getTokenNotOfCharClass(whitespaceSymbols));
- }
-
- token.setPosition(currentPosition);
- token.setLineNumber(lineNumber);
-
- // This'll have token *plus* postpunctuation
- // Get postpunctuation
- removeTokenPostpunctuation();
-
- return token;
- }
-
- /**
- * Returns true if there are more tokens, false
- * otherwise.
- *
- * @return true if there are more tokens false
- * otherwise
- */
- public boolean hasNext() {
- int nextChar = currentChar;
- return (nextChar != EOF);
- }
-
- public void remove() {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Advances the currentPosition pointer by 1 (if not exceeding length of
- * inputText, and returns the character pointed by currentPosition.
- *
- * @return the next character EOF if no more characters exist
- */
- private int getNextChar() {
- if (reader != null) {
- try {
- int readVal = reader.read();
- if (readVal == -1) {
- currentChar = EOF;
- } else {
- currentChar = (char) readVal;
- }
- } catch (IOException ioe) {
- currentChar = EOF;
- errorDescription = ioe.getMessage();
- }
- } else if (inputText != null) {
- if (currentPosition < inputText.length()) {
- currentChar = (int) inputText.charAt(currentPosition);
- } else {
- currentChar = EOF;
- }
- }
- if (currentChar != EOF) {
- currentPosition++;
- }
- if (currentChar == '\n') {
- lineNumber++;
- }
- return currentChar;
- }
-
- /**
- * Starting from the current position of the input text, returns the
- * subsequent characters of type charClass, and not of type
- * singleCharSymbols.
- *
- * @param charClass the type of characters to look for
- * @param buffer the place to append characters of type charClass
- *
- * @return a string of characters starting from the current position of the
- * input text, until it encounters a character not in the string
- * charClass
- *
- */
- private String getTokenOfCharClass(String charClass) {
- return getTokenByCharClass(charClass, true);
- }
-
- /**
- * Starting from the current position of the input text/file, returns the
- * subsequent characters, not of type singleCharSymbols, and ended at
- * characters of type endingCharClass. E.g., if the current string is
- * "xxxxyyy", endingCharClass is "yz", and singleCharClass "abc". Then this
- * method will return to "xxxx".
- *
- * @param endingCharClass the type of characters to look for
- *
- * @return a string of characters from the current position until it
- * encounters characters in endingCharClass
- *
- */
- private String getTokenNotOfCharClass(String endingCharClass) {
- return getTokenByCharClass(endingCharClass, false);
- }
-
- /**
- * Provides a `compressed' method from getTokenOfCharClass() and
- * getTokenNotOfCharClass(). If parameter containThisCharClass is
- * true, then a string from the current position to the last
- * character in charClass is returned. If containThisCharClass is
- * false , then a string before the first occurrence of a
- * character in containThisCharClass is returned.
- *
- * @param charClass the string of characters you want included or excluded
- * in your return
- * @param containThisCharClass determines if you want characters in
- * charClass in the returned string or not
- *
- * @return a string of characters from the current position until it
- * encounters characters in endingCharClass
- */
- private String getTokenByCharClass(String charClass,
- boolean containThisCharClass) {
- final StringBuilder buffer = new StringBuilder();
-
- // if we want the returned string to contain chars in charClass, then
- // containThisCharClass is TRUE and
- // (charClass.indexOf(currentChar) != 1) == containThisCharClass)
- // returns true; if we want it to stop at characters of charClass,
- // then containThisCharClass is FALSE, and the condition returns
- // false.
- while ((charClass.indexOf(currentChar) != -1) == containThisCharClass
- && singleCharSymbols.indexOf(currentChar) == -1
- && currentChar != EOF) {
- buffer.append((char) currentChar);
- getNextChar();
- }
- return buffer.toString();
- }
-
- /**
- * Removes the postpunctuation characters from the current token. Copies
- * those postpunctuation characters to the class variable
- * 'postpunctuation'.
- */
- private void removeTokenPostpunctuation() {
- if (token == null) {
- return;
- }
- final String tokenWord = token.getWord();
-
- int tokenLength = tokenWord.length();
- int position = tokenLength - 1;
-
- while (position > 0
- && postpunctuationSymbols.indexOf((int) tokenWord
- .charAt(position)) != -1) {
- position--;
- }
-
- if (tokenLength - 1 != position) {
- // Copy postpunctuation from token
- token.setPostpunctuation(tokenWord.substring(position + 1));
-
- // truncate token at postpunctuation
- token.setWord(tokenWord.substring(0, position + 1));
- } else {
- token.setPostpunctuation("");
- }
- }
-
- /**
- * Returns true if there were errors while reading tokens
- *
- * @return true if there were errors; false
- * otherwise
- */
- public boolean hasErrors() {
- return errorDescription != null;
- }
-
- /**
- * if hasErrors returns true, this will return a description
- * of the error encountered, otherwise it will return null
- *
- * @return a description of the last error that occurred.
- */
- public String getErrorDescription() {
- return errorDescription;
- }
-
- /**
- * Determines if the current token should start a new sentence.
- *
- * @return true if a new sentence should be started
- */
- public boolean isSentenceSeparator() {
- String tokenWhiteSpace = token.getWhitespace();
- String lastTokenPostpunctuation = null;
- if (lastToken != null) {
- lastTokenPostpunctuation = lastToken.getPostpunctuation();
- }
-
- if (lastToken == null || token == null) {
- return false;
- } else if (tokenWhiteSpace.indexOf('\n') != tokenWhiteSpace
- .lastIndexOf('\n')) {
- return true;
- } else if (lastTokenPostpunctuation.indexOf(':') != -1
- || lastTokenPostpunctuation.indexOf('?') != -1
- || lastTokenPostpunctuation.indexOf('!') != -1) {
- return true;
- } else if (lastTokenPostpunctuation.indexOf('.') != -1
- && tokenWhiteSpace.length() > 1
- && Character.isUpperCase(token.getWord().charAt(0))) {
- return true;
- } else {
- String lastWord = lastToken.getWord();
- int lastWordLength = lastWord.length();
-
- if (lastTokenPostpunctuation.indexOf('.') != -1
- &&
- /* next word starts with a capital */
- Character.isUpperCase(token.getWord().charAt(0))
- &&
- /* last word isn't an abbreviation */
- !(Character.isUpperCase(lastWord
- .charAt(lastWordLength - 1)) || (lastWordLength < 4 && Character
- .isUpperCase(lastWord.charAt(0))))) {
- return true;
- }
- }
- return false;
- }
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/DecisionTree.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/DecisionTree.java
deleted file mode 100755
index c8ea23e4..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/DecisionTree.java
+++ /dev/null
@@ -1,608 +0,0 @@
-/**
- * Portions Copyright 2001 Sun Microsystems, Inc.
- * Portions Copyright 1999-2001 Language Technologies Institute,
- * Carnegie Mellon University.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- */
-package edu.cmu.sphinx.alignment.tokenizer;
-
-import java.io.*;
-import java.net.URL;
-import java.util.StringTokenizer;
-import java.util.logging.Logger;
-import java.util.regex.Pattern;
-
-
-/**
- * Implementation of a Classification and Regression Tree (CART) that is used
- * more like a binary decision tree, with each node containing a decision or a
- * final value. The decision nodes in the CART trees operate on an Item and
- * have the following format:
- *
- *
- * NODE feat operand value qfalse
- *
- *
- *
- * Where feat is an string that represents a feature to pass to
- * the findFeature method of an item.
- *
- *
- * The value represents the value to be compared against the
- * feature obtained from the item via the feat string. The
- * operand is the operation to do the comparison. The available
- * operands are as follows:
- *
- *
- *
< - the feature is less than value
- *
=- the feature is equal to the value
- *
>- the feature is greater than the value
- *
MATCHES - the feature matches the regular expression stored in value
- *
IN - [[[TODO: still guessing because none of the CART's in Flite seem to
- * use IN]]] the value is in the list defined by the feature.
- *
- *
- *
- * [[[TODO: provide support for the IN operator.]]]
- *
- *
- * For < and >, this CART coerces the value and feature to float's. For =,
- * this CART coerces the value and feature to string and checks for string
- * equality. For MATCHES, this CART uses the value as a regular expression and
- * compares the obtained feature to that.
- *
- *
- * A CART is represented by an array in this implementation. The
- * qfalse value represents the index of the array to go to if the
- * comparison does not match. In this implementation, qtrue index is always
- * implied, and represents the next element in the array. The root node of the
- * CART is the first element in the array.
- *
- *
- * The interpretations always start at the root node of the CART and continue
- * until a final node is found. The final nodes have the following form:
- *
- *
- * LEAF value
- *
- *
- *
- * Where value represents the value of the node. Reaching a final
- * node indicates the interpretation is over and the value of the node is the
- * interpretation result.
- */
-public class DecisionTree {
- /** Logger instance. */
- private static final Logger logger = Logger.getLogger(DecisionTree.class.getSimpleName());
- /**
- * Entry in file represents the total number of nodes in the file. This
- * should be at the top of the file. The format should be "TOTAL n" where n
- * is an integer value.
- */
- final static String TOTAL = "TOTAL";
-
- /**
- * Entry in file represents a node. The format should be
- * "NODE feat op val f" where 'feat' represents a feature, op represents an
- * operand, val is the value, and f is the index of the node to go to is
- * there isn't a match.
- */
- final static String NODE = "NODE";
-
- /**
- * Entry in file represents a final node. The format should be "LEAF val"
- * where val represents the value.
- */
- final static String LEAF = "LEAF";
-
- /**
- * OPERAND_MATCHES
- */
- final static String OPERAND_MATCHES = "MATCHES";
-
- /**
- * The CART. Entries can be DecisionNode or LeafNode. An ArrayList could be
- * used here -- I chose not to because I thought it might be quicker to
- * avoid dealing with the dynamic resizing.
- */
- Node[] cart = null;
-
- /**
- * The number of nodes in the CART.
- */
- transient int curNode = 0;
-
- /**
- * Creates a new CART by reading from the given URL.
- *
- * @param url the location of the CART data
- *
- * @throws IOException if errors occur while reading the data
- */
- public DecisionTree(URL url) throws IOException {
- BufferedReader reader;
- String line;
-
- reader = new BufferedReader(new InputStreamReader(url.openStream()));
- line = reader.readLine();
- while (line != null) {
- if (!line.startsWith("***")) {
- parseAndAdd(line);
- }
- line = reader.readLine();
- }
- reader.close();
- }
-
- /**
- * Creates a new CART by reading from the given reader.
- *
- * @param reader the source of the CART data
- * @param nodes the number of nodes to read for this cart
- *
- * @throws IOException if errors occur while reading the data
- */
- public DecisionTree(BufferedReader reader, int nodes) throws IOException {
- this(nodes);
- String line;
- for (int i = 0; i < nodes; i++) {
- line = reader.readLine();
- if (!line.startsWith("***")) {
- parseAndAdd(line);
- }
- }
- }
-
- /**
- * Creates a new CART that will be populated with nodes later.
- *
- * @param numNodes the number of nodes
- */
- private DecisionTree(int numNodes) {
- cart = new Node[numNodes];
- }
-
- /**
- * Dump the CART tree as a dot file.
- *
- * The dot tool is part of the graphviz distribution at http://www.graphviz.org/. If
- * installed, call it as "dot -O -Tpdf *.dot" from the console to generate
- * pdfs.
- *
- *
- * @param out The PrintWriter to write to.
- */
- public void dumpDot(PrintWriter out) {
- out.write("digraph \"" + "CART Tree" + "\" {\n");
- out.write("rankdir = LR\n");
-
- for (Node n : cart) {
- out.println("\tnode" + Math.abs(n.hashCode()) + " [ label=\""
- + n.toString() + "\", color=" + dumpDotNodeColor(n)
- + ", shape=" + dumpDotNodeShape(n) + " ]\n");
- if (n instanceof DecisionNode) {
- DecisionNode dn = (DecisionNode) n;
- if (dn.qtrue < cart.length && cart[dn.qtrue] != null) {
- out.write("\tnode" + Math.abs(n.hashCode()) + " -> node"
- + Math.abs(cart[dn.qtrue].hashCode())
- + " [ label=" + "TRUE" + " ]\n");
- }
- if (dn.qfalse < cart.length && cart[dn.qfalse] != null) {
- out.write("\tnode" + Math.abs(n.hashCode()) + " -> node"
- + Math.abs(cart[dn.qfalse].hashCode())
- + " [ label=" + "FALSE" + " ]\n");
- }
- }
- }
-
- out.write("}\n");
- out.close();
- }
-
- protected String dumpDotNodeColor(Node n) {
- if (n instanceof LeafNode) {
- return "green";
- }
- return "red";
- }
-
- protected String dumpDotNodeShape(Node n) {
- return "box";
- }
-
- /**
- * Creates a node from the given input line and add it to the CART. It
- * expects the TOTAL line to come before any of the nodes.
- *
- * @param line a line of input to parse
- */
- protected void parseAndAdd(String line) {
- StringTokenizer tokenizer = new StringTokenizer(line, " ");
- String type = tokenizer.nextToken();
- if (type.equals(LEAF) || type.equals(NODE)) {
- cart[curNode] = getNode(type, tokenizer, curNode);
- cart[curNode].setCreationLine(line);
- curNode++;
- } else if (type.equals(TOTAL)) {
- cart = new Node[Integer.parseInt(tokenizer.nextToken())];
- curNode = 0;
- } else {
- throw new Error("Invalid CART type: " + type);
- }
- }
-
- /**
- * Gets the node based upon the type and tokenizer.
- *
- * @param type NODE or LEAF
- * @param tokenizer the StringTokenizer containing the data to get
- * @param currentNode the index of the current node we're looking at
- *
- * @return the node
- */
- protected Node getNode(String type, StringTokenizer tokenizer,
- int currentNode) {
- if (type.equals(NODE)) {
- String feature = tokenizer.nextToken();
- String operand = tokenizer.nextToken();
- Object value = parseValue(tokenizer.nextToken());
- int qfalse = Integer.parseInt(tokenizer.nextToken());
- if (operand.equals(OPERAND_MATCHES)) {
- return new MatchingNode(feature, value.toString(),
- currentNode + 1, qfalse);
- } else {
- return new ComparisonNode(feature, value, operand,
- currentNode + 1, qfalse);
- }
- } else if (type.equals(LEAF)) {
- return new LeafNode(parseValue(tokenizer.nextToken()));
- }
-
- return null;
- }
-
- /**
- * Coerces a string into a value.
- *
- * @param string of the form "type(value)"; for example, "Float(2.3)"
- *
- * @return the value
- */
- protected Object parseValue(String string) {
- int openParen = string.indexOf("(");
- String type = string.substring(0, openParen);
- String value = string.substring(openParen + 1, string.length() - 1);
- if (type.equals("String")) {
- return value;
- } else if (type.equals("Float")) {
- return new Float(Float.parseFloat(value));
- } else if (type.equals("Integer")) {
- return new Integer(Integer.parseInt(value));
- } else if (type.equals("List")) {
- StringTokenizer tok = new StringTokenizer(value, ",");
- int size = tok.countTokens();
-
- int[] values = new int[size];
- for (int i = 0; i < size; i++) {
- float fval = Float.parseFloat(tok.nextToken());
- values[i] = Math.round(fval);
- }
- return values;
- } else {
- throw new Error("Unknown type: " + type);
- }
- }
-
- /**
- * Passes the given item through this CART and returns the interpretation.
- *
- * @param item the item to analyze
- *
- * @return the interpretation
- */
- public Object interpret(Item item) {
- int nodeIndex = 0;
- DecisionNode decision;
-
- while (!(cart[nodeIndex] instanceof LeafNode)) {
- decision = (DecisionNode) cart[nodeIndex];
- nodeIndex = decision.getNextNode(item);
- }
- logger.fine("LEAF " + cart[nodeIndex].getValue());
- return ((LeafNode) cart[nodeIndex]).getValue();
- }
-
- /**
- * A node for the CART.
- */
- static abstract class Node {
- /**
- * The value of this node.
- */
- protected Object value;
-
- /**
- * Create a new Node with the given value.
- */
- public Node(Object value) {
- this.value = value;
- }
-
- /**
- * Get the value.
- */
- public Object getValue() {
- return value;
- }
-
- /**
- * Return a string representation of the type of the value.
- */
- public String getValueString() {
- if (value == null) {
- return "NULL()";
- } else if (value instanceof String) {
- return "String(" + value.toString() + ")";
- } else if (value instanceof Float) {
- return "Float(" + value.toString() + ")";
- } else if (value instanceof Integer) {
- return "Integer(" + value.toString() + ")";
- } else {
- return value.getClass().toString() + "(" + value.toString()
- + ")";
- }
- }
-
- /**
- * sets the line of text used to create this node.
- *
- * @param line the creation line
- */
- public void setCreationLine(String line) {}
- }
-
- /**
- * A decision node that determines the next Node to go to in the CART.
- */
- abstract static class DecisionNode extends Node {
- /**
- * The feature used to find a value from an Item.
- */
- private PathExtractor path;
-
- /**
- * Index of Node to go to if the comparison doesn't match.
- */
- protected int qfalse;
-
- /**
- * Index of Node to go to if the comparison matches.
- */
- protected int qtrue;
-
- /**
- * The feature used to find a value from an Item.
- */
- public String getFeature() {
- return path.toString();
- }
-
- /**
- * Find the feature associated with this DecisionNode and the given
- * item
- *
- * @param item the item to start from
- * @return the object representing the feature
- */
- public Object findFeature(Item item) {
- return path.findFeature(item);
- }
-
- /**
- * Returns the next node based upon the descision determined at this
- * node
- *
- * @param item the current item.
- * @return the index of the next node
- */
- public final int getNextNode(Item item) {
- return getNextNode(findFeature(item));
- }
-
- /**
- * Create a new DecisionNode.
- *
- * @param feature the string used to get a value from an Item
- * @param value the value to compare to
- * @param qtrue the Node index to go to if the comparison matches
- * @param qfalse the Node machine index to go to upon no match
- */
- public DecisionNode(String feature, Object value, int qtrue, int qfalse) {
- super(value);
- this.path = new PathExtractor(feature, true);
- this.qtrue = qtrue;
- this.qfalse = qfalse;
- }
-
- /**
- * Get the next Node to go to in the CART. The return value is an index
- * in the CART.
- */
- abstract public int getNextNode(Object val);
- }
-
- /**
- * A decision Node that compares two values.
- */
- static class ComparisonNode extends DecisionNode {
- /**
- * LESS_THAN
- */
- final static String LESS_THAN = "<";
-
- /**
- * EQUALS
- */
- final static String EQUALS = "=";
-
- /**
- * GREATER_THAN
- */
- final static String GREATER_THAN = ">";
-
- /**
- * The comparison type. One of LESS_THAN, GREATER_THAN, or EQUAL_TO.
- */
- String comparisonType;
-
- /**
- * Create a new ComparisonNode with the given values.
- *
- * @param feature the string used to get a value from an Item
- * @param value the value to compare to
- * @param comparisonType one of LESS_THAN, EQUAL_TO, or GREATER_THAN
- * @param qtrue the Node index to go to if the comparison matches
- * @param qfalse the Node index to go to upon no match
- */
- public ComparisonNode(String feature, Object value,
- String comparisonType, int qtrue, int qfalse) {
- super(feature, value, qtrue, qfalse);
- if (!comparisonType.equals(LESS_THAN)
- && !comparisonType.equals(EQUALS)
- && !comparisonType.equals(GREATER_THAN)) {
- throw new Error("Invalid comparison type: " + comparisonType);
- } else {
- this.comparisonType = comparisonType;
- }
- }
-
- /**
- * Compare the given value and return the appropriate Node index.
- * IMPLEMENTATION NOTE: LESS_THAN and GREATER_THAN, the Node's value
- * and the value passed in are converted to floating point values. For
- * EQUAL, the Node's value and the value passed in are treated as
- * String compares. This is the way of Flite, so be it Flite.
- *
- * @param val the value to compare
- */
- public int getNextNode(Object val) {
- boolean yes = false;
- int ret;
-
- if (comparisonType.equals(LESS_THAN)
- || comparisonType.equals(GREATER_THAN)) {
- float cart_fval;
- float fval;
- if (value instanceof Float) {
- cart_fval = ((Float) value).floatValue();
- } else {
- cart_fval = Float.parseFloat(value.toString());
- }
- if (val instanceof Float) {
- fval = ((Float) val).floatValue();
- } else {
- fval = Float.parseFloat(val.toString());
- }
- if (comparisonType.equals(LESS_THAN)) {
- yes = (fval < cart_fval);
- } else {
- yes = (fval > cart_fval);
- }
- } else { // comparisonType = "="
- String sval = val.toString();
- String cart_sval = value.toString();
- yes = sval.equals(cart_sval);
- }
- if (yes) {
- ret = qtrue;
- } else {
- ret = qfalse;
- }
- logger.fine(trace(val, yes, ret));
- return ret;
- }
-
- private String trace(Object value, boolean match, int next) {
- return "NODE " + getFeature() + " [" + value + "] "
- + comparisonType + " [" + getValue() + "] "
- + (match ? "Yes" : "No") + " next " + next;
- }
-
- /**
- * Get a string representation of this Node.
- */
- public String toString() {
- return "NODE " + getFeature() + " " + comparisonType + " "
- + getValueString() + " " + Integer.toString(qtrue) + " "
- + Integer.toString(qfalse);
- }
- }
-
- /**
- * A Node that checks for a regular expression match.
- */
- static class MatchingNode extends DecisionNode {
- Pattern pattern;
-
- /**
- * Create a new MatchingNode with the given values.
- *
- * @param feature the string used to get a value from an Item
- * @param regex the regular expression
- * @param qtrue the Node index to go to if the comparison matches
- * @param qfalse the Node index to go to upon no match
- */
- public MatchingNode(String feature, String regex, int qtrue, int qfalse) {
- super(feature, regex, qtrue, qfalse);
- this.pattern = Pattern.compile(regex);
- }
-
- /**
- * Compare the given value and return the appropriate CART index.
- *
- * @param val the value to compare -- this must be a String
- */
- public int getNextNode(Object val) {
- return pattern.matcher((String) val).matches() ? qtrue : qfalse;
- }
-
- /**
- * Get a string representation of this Node.
- */
- public String toString() {
- StringBuffer buf =
- new StringBuffer(NODE + " " + getFeature() + " "
- + OPERAND_MATCHES);
- buf.append(getValueString() + " ");
- buf.append(Integer.toString(qtrue) + " ");
- buf.append(Integer.toString(qfalse));
- return buf.toString();
- }
- }
-
- /**
- * The final Node of a CART. This just a marker class.
- */
- static class LeafNode extends Node {
- /**
- * Create a new LeafNode with the given value.
- *
- * @param the value of this LeafNode
- */
- public LeafNode(Object value) {
- super(value);
- }
-
- /**
- * Get a string representation of this Node.
- */
- public String toString() {
- return "LEAF " + getValueString();
- }
- }
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/FeatureSet.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/FeatureSet.java
deleted file mode 100755
index 959901fe..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/FeatureSet.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * Portions Copyright 2001 Sun Microsystems, Inc.
- * Portions Copyright 1999-2001 Language Technologies Institute,
- * Carnegie Mellon University.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- */
-package edu.cmu.sphinx.alignment.tokenizer;
-
-import java.text.DecimalFormat;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-/**
- * Implementation of the FeatureSet interface.
- */
-public class FeatureSet {
-
- private final Map featureMap;
- static DecimalFormat formatter;
-
- /**
- * Creates a new empty feature set
- */
- public FeatureSet() {
- featureMap = new LinkedHashMap();
- }
-
- /**
- * Determines if the given feature is present.
- *
- * @param name the name of the feature of interest
- *
- * @return true if the named feature is present
- */
- public boolean isPresent(String name) {
- return featureMap.containsKey(name);
- }
-
- /**
- * Removes the named feature from this set of features.
- *
- * @param name the name of the feature of interest
- */
- public void remove(String name) {
- featureMap.remove(name);
- }
-
- /**
- * Convenience method that returns the named feature as a string.
- *
- * @param name the name of the feature
- *
- * @return the value associated with the name or null if the value is not
- * found
- *
- * @throws ClassCastException if the associated value is not a String
- */
- public String getString(String name) {
- return (String) getObject(name);
- }
-
- /**
- * Convenience method that returns the named feature as a int.
- *
- * @param name the name of the feature
- *
- * @return the value associated with the name or null if the value is not
- * found
- *
- * @throws ClassCastException if the associated value is not an int.
- */
- public int getInt(String name) {
- return ((Integer) getObject(name)).intValue();
- }
-
- /**
- * Convenience method that returns the named feature as a float.
- *
- * @param name the name of the feature
- *
- * @return the value associated with the name or null if the value is not
- * found.
- *
- * @throws ClassCastException if the associated value is not a float
- */
- public float getFloat(String name) {
- return ((Float) getObject(name)).floatValue();
- }
-
- /**
- * Returns the named feature as an object.
- *
- * @param name the name of the feature
- *
- * @return the value associated with the name or null if the value is not
- * found
- */
- public Object getObject(String name) {
- return featureMap.get(name);
- }
-
- /**
- * Convenience method that sets the named feature as a int.
- *
- * @param name the name of the feature
- * @param value the value of the feature
- */
- public void setInt(String name, int value) {
- setObject(name, new Integer(value));
- }
-
- /**
- * Convenience method that sets the named feature as a float.
- *
- * @param name the name of the feature
- * @param value the value of the feature
- */
- public void setFloat(String name, float value) {
- setObject(name, new Float(value));
- }
-
- /**
- * Convenience method that sets the named feature as a String.
- *
- * @param name the name of the feature
- * @param value the value of the feature
- */
- public void setString(String name, String value) {
- setObject(name, value);
- }
-
- /**
- * Sets the named feature.
- *
- * @param name the name of the feature
- * @param value the value of the feature
- */
- public void setObject(String name, Object value) {
- featureMap.put(name, value);
- }
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/Item.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/Item.java
deleted file mode 100755
index 2e69cb3b..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/Item.java
+++ /dev/null
@@ -1,447 +0,0 @@
-/**
- * Portions Copyright 2001-2003 Sun Microsystems, Inc.
- * Portions Copyright 1999-2001 Language Technologies Institute,
- * Carnegie Mellon University.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- */
-package edu.cmu.sphinx.alignment.tokenizer;
-
-import java.util.StringTokenizer;
-
-/**
- * Represents a node in a Relation. Items can have shared contents but each
- * item has its own set of Daughters. The shared contents of an item
- * (represented by ItemContents) includes the feature set for the item and the
- * set of all relations that this item is contained in. An item can be
- * contained in a number of relations and as daughters to other items. This
- * class is used to keep track of all of these relationships. There may be many
- * instances of item that reference the same shared ItemContents.
- */
-public class Item {
- private Relation ownerRelation;
- private ItemContents contents;
- private Item parent;
- private Item daughter;
- private Item next;
- private Item prev;
-
- /**
- * Creates an item. The item is coupled to a particular Relation. If shared
- * contents is null a new sharedContents is created.
- *
- * @param relation the relation that owns this item
- * @param sharedContents the contents that is shared with others. If null,
- * a new sharedContents is created.
- */
- public Item(Relation relation, ItemContents sharedContents) {
- ownerRelation = relation;
- if (sharedContents != null) {
- contents = sharedContents;
- } else {
- contents = new ItemContents();
- }
- parent = null;
- daughter = null;
- next = null;
- prev = null;
-
- getSharedContents().addItemRelation(relation.getName(), this);
- }
-
- /**
- * Finds the item in the given relation that has the same shared contents.
- *
- * @param relationName the relation of interest
- *
- * @return the item as found in the given relation or null if not found
- */
- public Item getItemAs(String relationName) {
- return getSharedContents().getItemRelation(relationName);
- }
-
- /**
- * Retrieves the owning Relation.
- *
- * @return the relation that owns this item
- */
- public Relation getOwnerRelation() {
- return ownerRelation;
- }
-
- /**
- * Retrieves the shared contents for this item.
- *
- * @return the shared item contents
- */
- public ItemContents getSharedContents() {
- return contents;
- }
-
- /**
- * Determines if this item has daughters.
- *
- * @return true if this item has daughters
- */
- public boolean hasDaughters() {
- return daughter != null;
- }
-
- /**
- * Retrieves the first daughter of this item.
- *
- * @return the first daughter or null if none
- */
- public Item getDaughter() {
- return daughter;
- }
-
- /**
- * Retrieves the Nth daughter of this item.
- *
- * @param which the index of the daughter to return
- *
- * @return the Nth daughter or null if none at the given index
- */
- public Item getNthDaughter(int which) {
- Item d = daughter;
- int count = 0;
- while (count++ != which && d != null) {
- d = d.next;
- }
- return d;
- }
-
- /**
- * Retrieves the last daughter of this item.
- *
- * @return the last daughter or null if none at the given index
- */
- public Item getLastDaughter() {
- Item d = daughter;
- if (d == null) {
- return null;
- }
- while (d.next != null) {
- d = d.next;
- }
- return d;
- }
-
- /**
- * Adds the given item as a daughter to this item.
- *
- * @param item for the new daughter
- * @return created item
- */
- public Item addDaughter(Item item) {
- Item newItem;
- ItemContents contents;
-
- Item p = getLastDaughter();
-
- if (p != null) {
- newItem = p.appendItem(item);
- } else {
- if (item == null) {
- contents = new ItemContents();
- } else {
- contents = item.getSharedContents();
- }
- newItem = new Item(getOwnerRelation(), contents);
- newItem.parent = this;
- daughter = newItem;
- }
- return newItem;
- }
-
- /**
- * Creates a new Item, adds it as a daughter to this item and returns the
- * new item.
- *
- * @return the newly created item that was added as a daughter
- */
- public Item createDaughter() {
- return addDaughter(null);
- }
-
- /**
- * Returns the parent of this item.
- *
- * @return the parent of this item
- */
- public Item getParent() {
- Item n;
- for (n = this; n.prev != null; n = n.prev) {
- }
- return n.parent;
- }
-
- /**
- * Sets the parent of this item.
- *
- * @param parent the parent of this item
- */
- /*
- * private void setParent(Item parent) { this.parent = parent; }
- */
-
- /**
- * Returns the utterance associated with this item.
- *
- * @return the utterance that contains this item
- */
- public Utterance getUtterance() {
- return getOwnerRelation().getUtterance();
- }
-
- /**
- * Returns the feature set of this item.
- *
- * @return the feature set of this item
- */
- public FeatureSet getFeatures() {
- return getSharedContents().getFeatures();
- }
-
- /**
- * Finds the feature by following the given path. Path is a string of ":"
- * or "." separated strings with the following interpretations:
- *
- *
n - next item
- *
p - previous item
- *
parent - the parent
- *
daughter - the daughter
- *
daughter1 - same as daughter
- *
daughtern - the last daughter
- *
R:relname - the item as found in the given relation 'relname'
- *
- * The last element of the path will be interpreted as a voice/language
- * specific feature function (if present) or an item feature name. If the
- * feature function exists it will be called with the item specified by the
- * path, otherwise, a feature will be retrieved with the given name. If
- * neither exist than a String "0" is returned.
- *
- * @param pathAndFeature the path to follow
- * @return created object
- */
- public Object findFeature(String pathAndFeature) {
- int lastDot;
- String feature;
- String path;
- Item item;
- Object results = null;
-
- lastDot = pathAndFeature.lastIndexOf(".");
- // string can be of the form "p.feature" or just "feature"
-
- if (lastDot == -1) {
- feature = pathAndFeature;
- path = null;
- } else {
- feature = pathAndFeature.substring(lastDot + 1);
- path = pathAndFeature.substring(0, lastDot);
- }
-
- item = findItem(path);
- if (item != null) {
- results = item.getFeatures().getObject(feature);
- }
- results = (results == null) ? "0" : results;
-
- // System.out.println("FI " + pathAndFeature + " are " + results);
-
- return results;
- }
-
- /**
- * Finds the item specified by the given path.
- *
- * Path is a string of ":" or "." separated strings with the following
- * interpretations:
- *
- *
n - next item
- *
p - previous item
- *
parent - the parent
- *
daughter - the daughter
- *
daughter1 - same as daughter
- *
daughtern - the last daughter
- *
R:relname - the item as found in the given relation 'relname'
- *
- * If the given path takes us outside of the bounds of the item graph, then
- * list access exceptions will be thrown.
- *
- * @param path the path to follow
- *
- * @return the item at the given path
- */
- public Item findItem(String path) {
- Item pitem = this;
- StringTokenizer tok;
-
- if (path == null) {
- return this;
- }
-
- tok = new StringTokenizer(path, ":.");
-
- while (pitem != null && tok.hasMoreTokens()) {
- String token = tok.nextToken();
- if (token.equals("n")) {
- pitem = pitem.getNext();
- } else if (token.equals("p")) {
- pitem = pitem.getPrevious();
- } else if (token.equals("nn")) {
- pitem = pitem.getNext();
- if (pitem != null) {
- pitem = pitem.getNext();
- }
- } else if (token.equals("pp")) {
- pitem = pitem.getPrevious();
- if (pitem != null) {
- pitem = pitem.getPrevious();
- }
- } else if (token.equals("parent")) {
- pitem = pitem.getParent();
- } else if (token.equals("daughter") || token.equals("daughter1")) {
- pitem = pitem.getDaughter();
- } else if (token.equals("daughtern")) {
- pitem = pitem.getLastDaughter();
- } else if (token.equals("R")) {
- String relationName = tok.nextToken();
- pitem =
- pitem.getSharedContents()
- .getItemRelation(relationName);
- } else {
- System.out.println("findItem: bad feature " + token + " in "
- + path);
- }
- }
- return pitem;
- }
-
- /**
- * Gets the next item in this list.
- *
- * @return the next item or null
- */
- public Item getNext() {
- return next;
- }
-
- /**
- * Gets the previous item in this list.
- *
- * @return the previous item or null
- */
- public Item getPrevious() {
- return prev;
- }
-
- /**
- * Appends an item in this list after this item.
- *
- * @param originalItem new item has shared contents with this item (or *
- * null)
- *
- * @return the newly appended item
- */
- public Item appendItem(Item originalItem) {
- ItemContents contents;
- Item newItem;
-
- if (originalItem == null) {
- contents = null;
- } else {
- contents = originalItem.getSharedContents();
- }
-
- newItem = new Item(getOwnerRelation(), contents);
- newItem.next = this.next;
- if (this.next != null) {
- this.next.prev = newItem;
- }
-
- attach(newItem);
-
- if (this.ownerRelation.getTail() == this) {
- this.ownerRelation.setTail(newItem);
- }
- return newItem;
- }
-
- /**
- * Attaches/appends an item to this one.
- *
- * @param item the item to append
- */
- void attach(Item item) {
- this.next = item;
- item.prev = this;
- }
-
- /**
- * Prepends an item in this list before this item.
- *
- * @param originalItem new item has shared contents with this item (or *
- * null)
- *
- * @return the newly appended item
- */
- public Item prependItem(Item originalItem) {
- ItemContents contents;
- Item newItem;
-
- if (originalItem == null) {
- contents = null;
- } else {
- contents = originalItem.getSharedContents();
- }
-
- newItem = new Item(getOwnerRelation(), contents);
- newItem.prev = this.prev;
- if (this.prev != null) {
- this.prev.next = newItem;
- }
- newItem.next = this;
- this.prev = newItem;
- if (this.parent != null) {
- this.parent.daughter = newItem;
- newItem.parent = this.parent;
- this.parent = null;
- }
- if (this.ownerRelation.getHead() == this) {
- this.ownerRelation.setHead(newItem);
- }
- return newItem;
- }
-
- // Inherited from object
- public String toString() {
- // if we have a feature called 'name' use that
- // otherwise fall back on the default.
- String name = getFeatures().getString("name");
- if (name == null) {
- name = "";
- }
- return name;
- }
-
- /**
- * Determines if the shared contents of the two items are the same.
- *
- * @param otherItem the item to compare
- *
- * @return true if the shared contents are the same
- */
- public boolean equalsShared(Item otherItem) {
- if (otherItem == null) {
- return false;
- } else {
- return getSharedContents().equals(otherItem.getSharedContents());
- }
- }
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/ItemContents.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/ItemContents.java
deleted file mode 100755
index 75213175..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/ItemContents.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Portions Copyright 2001 Sun Microsystems, Inc.
- * Portions Copyright 1999-2001 Language Technologies Institute,
- * Carnegie Mellon University.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- */
-package edu.cmu.sphinx.alignment.tokenizer;
-
-
-/**
- * Contains the information that is shared between multiple items.
- */
-public class ItemContents {
- private FeatureSet features;
- private FeatureSet relations;
-
- /**
- * Class Constructor.
- */
- public ItemContents() {
- features = new FeatureSet();
- relations = new FeatureSet();
- }
-
- /**
- * Adds the given item to the set of relations. Whenever an item is added
- * to a relation, it should add the name and the Item reference to this set
- * of name/item mappings. This allows an item to find out the set of all
- * relations that it is contained in.
- *
- * @param relationName the name of the relation
- * @param item the item reference in the relation
- */
- public void addItemRelation(String relationName, Item item) {
- // System.out.println("AddItemRelation: " + relationName
- // + " item: " + item);
- relations.setObject(relationName, item);
- }
-
- /**
- * Removes the relation/item mapping from this ItemContents.
- *
- * @param relationName the name of the relation/item to remove
- */
- public void removeItemRelation(String relationName) {
- relations.remove(relationName);
- }
-
- /**
- * Given the name of a relation, returns the item the shares the same
- * ItemContents.
- *
- * @param relationName the name of the relation of interest
- *
- * @return the item associated with this ItemContents in the named
- * relation, or null if it does not exist
- */
- public Item getItemRelation(String relationName) {
- return (Item) relations.getObject(relationName);
- }
-
- /**
- * Returns the feature set for this item contents.
- *
- * @return the FeatureSet for this contents
- */
- public FeatureSet getFeatures() {
- return features;
- }
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/NumberExpander.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/NumberExpander.java
deleted file mode 100755
index 4d81abfa..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/NumberExpander.java
+++ /dev/null
@@ -1,449 +0,0 @@
-/**
- * Portions Copyright 2001-2003 Sun Microsystems, Inc.
- * Portions Copyright 1999-2001 Language Technologies Institute,
- * Carnegie Mellon University.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- */
-package edu.cmu.sphinx.alignment.tokenizer;
-
-
-/**
- * Expands Strings containing digits characters into a list of words
- * representing those digits.
- *
- * It translates the following code from flite:
- * lang/usEnglish/us_expand.c
- */
-public class NumberExpander {
-
- private static final String[] digit2num = {"zero", "one", "two", "three",
- "four", "five", "six", "seven", "eight", "nine"};
-
- private static final String[] digit2teen = {"ten", /* shouldn't get called */
- "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen",
- "seventeen", "eighteen", "nineteen"};
-
- private static final String[] digit2enty = {"zero", /* shouldn't get called */
- "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty",
- "ninety"};
-
- private static final String[] ord2num = {"zeroth", "first", "second",
- "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth"};
-
- private static final String[] ord2teen = {"tenth", /* shouldn't get called */
- "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth",
- "sixteenth", "seventeenth", "eighteenth", "nineteenth"};
-
- private static final String[] ord2enty = {"zeroth", /* shouldn't get called */
- "tenth", "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth",
- "seventieth", "eightieth", "ninetieth"};
-
- private static String[] digit2Numness = {
- "", "tens", "twenties", "thirties", "fourties", "fifties",
- "sixties", "seventies", "eighties", "nineties"
- };
-
- /**
- * Unconstructable
- */
- private NumberExpander() {}
-
- /**
- * Expands a digit string into a list of English words of those digits. For
- * example, "1234" expands to "one two three four"
- *
- * @param numberString the digit string to expand.
- * @param wordRelation words are added to this Relation
- */
- public static void expandNumber(String numberString,
- WordRelation wordRelation) {
- int numDigits = numberString.length();
-
- if (numDigits == 0) {
- // wordRelation = null;
- } else if (numDigits == 1) {
- expandDigits(numberString, wordRelation);
- } else if (numDigits == 2) {
- expand2DigitNumber(numberString, wordRelation);
- } else if (numDigits == 3) {
- expand3DigitNumber(numberString, wordRelation);
- } else if (numDigits < 7) {
- expandBelow7DigitNumber(numberString, wordRelation);
- } else if (numDigits < 10) {
- expandBelow10DigitNumber(numberString, wordRelation);
- } else if (numDigits < 13) {
- expandBelow13DigitNumber(numberString, wordRelation);
- } else {
- expandDigits(numberString, wordRelation);
- }
- }
-
- /**
- * Expands a two-digit string into a list of English words.
- *
- * @param numberString the string which is the number to expand
- * @param wordRelation words are added to this Relation
- */
- private static void expand2DigitNumber(String numberString,
- WordRelation wordRelation) {
- if (numberString.charAt(0) == '0') {
- // numberString is "0X"
- if (numberString.charAt(1) == '0') {
- // numberString is "00", do nothing
- } else {
- // numberString is "01", "02" ...
- String number = digit2num[numberString.charAt(1) - '0'];
- wordRelation.addWord(number);
- }
- } else if (numberString.charAt(1) == '0') {
- // numberString is "10", "20", ...
- String number = digit2enty[numberString.charAt(0) - '0'];
- wordRelation.addWord(number);
- } else if (numberString.charAt(0) == '1') {
- // numberString is "11", "12", ..., "19"
- String number = digit2teen[numberString.charAt(1) - '0'];
- wordRelation.addWord(number);
- } else {
- // numberString is "2X", "3X", ...
- String enty = digit2enty[numberString.charAt(0) - '0'];
- wordRelation.addWord(enty);
- expandDigits(numberString.substring(1, numberString.length()),
- wordRelation);
- }
- }
-
- /**
- * Expands a three-digit string into a list of English words.
- *
- * @param numberString the string which is the number to expand
- * @param wordRelation words are added to this Relation
- */
- private static void expand3DigitNumber(String numberString,
- WordRelation wordRelation) {
- if (numberString.charAt(0) == '0') {
- expandNumberAt(numberString, 1, wordRelation);
- } else {
- String hundredDigit = digit2num[numberString.charAt(0) - '0'];
- wordRelation.addWord(hundredDigit);
- wordRelation.addWord("hundred");
- expandNumberAt(numberString, 1, wordRelation);
- }
- }
-
- /**
- * Expands a string that is a 4 to 6 digits number into a list of English
- * words. For example, "333000" into "three hundred and thirty-three
- * thousand".
- *
- * @param numberString the string which is the number to expand
- * @param wordRelation words are added to this Relation
- */
- private static void expandBelow7DigitNumber(String numberString,
- WordRelation wordRelation) {
- expandLargeNumber(numberString, "thousand", 3, wordRelation);
- }
-
- /**
- * Expands a string that is a 7 to 9 digits number into a list of English
- * words. For example, "19000000" into nineteen million.
- *
- * @param numberString the string which is the number to expand
- * @param wordRelation words are added to this Relation
- */
- private static void expandBelow10DigitNumber(String numberString,
- WordRelation wordRelation) {
- expandLargeNumber(numberString, "million", 6, wordRelation);
- }
-
- /**
- * Expands a string that is a 10 to 12 digits number into a list of English
- * words. For example, "27000000000" into twenty-seven billion.
- *
- * @param numberString the string which is the number to expand
- * @param wordRelation words are added to this Relation
- */
- private static void expandBelow13DigitNumber(String numberString,
- WordRelation wordRelation) {
- expandLargeNumber(numberString, "billion", 9, wordRelation);
- }
-
- /**
- * Expands a string that is a number longer than 3 digits into a list of
- * English words. For example, "1000" into one thousand.
- *
- * @param numberString the string which is the number to expand
- * @param order either "thousand", "million", or "billion"
- * @param numberZeroes the number of zeroes, depending on the order, so its
- * either 3, 6, or 9
- * @param wordRelation words are added to this Relation
- */
- private static void expandLargeNumber(String numberString, String order,
- int numberZeroes, WordRelation wordRelation) {
- int numberDigits = numberString.length();
-
- // parse out the prefix, e.g., "113" in "113,000"
- int i = numberDigits - numberZeroes;
- String part = numberString.substring(0, i);
-
- // get how many thousands/millions/billions
- Item oldTail = wordRelation.getTail();
- expandNumber(part, wordRelation);
- if (wordRelation.getTail() != oldTail) {
- wordRelation.addWord(order);
- }
- expandNumberAt(numberString, i, wordRelation);
- }
-
- /**
- * Returns the number string list of the given string starting at the given
- * index. E.g., expandNumberAt("1100", 1) gives "one hundred"
- *
- * @param numberString the string which is the number to expand
- * @param startIndex the starting position
- * @param wordRelation words are added to this Relation
- */
- private static void expandNumberAt(String numberString, int startIndex,
- WordRelation wordRelation) {
- expandNumber(
- numberString.substring(startIndex, numberString.length()),
- wordRelation);
- }
-
- /**
- * Expands given token to list of words pronouncing it as digits
- *
- * @param numberString the string which is the number to expand
- * @param wordRelation words are added to this Relation
- */
- public static void expandDigits(String numberString,
- WordRelation wordRelation) {
- int numberDigits = numberString.length();
- for (int i = 0; i < numberDigits; i++) {
- char digit = numberString.charAt(i);
- if (Character.isDigit(digit)) {
- wordRelation.addWord(digit2num[numberString.charAt(i) - '0']);
- } else {
- wordRelation.addWord("umpty");
- }
- }
- }
-
- /**
- * Expands the digit string of an ordinal number.
- *
- * @param rawNumberString the string which is the number to expand
- * @param wordRelation words are added to this Relation
- */
- public static void expandOrdinal(String rawNumberString,
- WordRelation wordRelation) {
- // remove all ','s from the raw number string
- expandNumber(rawNumberString.replace(",", ""), wordRelation);
-
- // get the last in the list of number strings
- Item lastItem = wordRelation.getTail();
-
- if (lastItem != null) {
-
- FeatureSet featureSet = lastItem.getFeatures();
- String lastNumber = featureSet.getString("name");
- String ordinal = findMatchInArray(lastNumber, digit2num, ord2num);
-
- if (ordinal == null) {
- ordinal = findMatchInArray(lastNumber, digit2teen, ord2teen);
- }
- if (ordinal == null) {
- ordinal = findMatchInArray(lastNumber, digit2enty, ord2enty);
- }
-
- if (lastNumber.equals("hundred")) {
- ordinal = "hundredth";
- } else if (lastNumber.equals("thousand")) {
- ordinal = "thousandth";
- } else if (lastNumber.equals("billion")) {
- ordinal = "billionth";
- }
-
- // if there was an ordinal, set the last element of the list
- // to that ordinal; otherwise, don't do anything
- if (ordinal != null) {
- wordRelation.setLastWord(ordinal);
- }
- }
- }
-
-
- public static void expandNumess(String rawString, WordRelation wordRelation) {
- if (rawString.length() == 4) {
- expand2DigitNumber(rawString.substring(0, 2), wordRelation);
- expandNumess(rawString.substring(2), wordRelation);
- } else {
- wordRelation.addWord(digit2Numness[rawString.charAt(0) - '0']);
- }
- }
-
- /**
- * Finds a match of the given string in the given array, and returns the
- * element at the same index in the returnInArray
- *
- * @param strToMatch the string to match
- * @param matchInArray the source array
- * @param returnInArray the return array
- *
- * @return an element in returnInArray, or null if a match is
- * not found
- */
- private static String findMatchInArray(String strToMatch,
- String[] matchInArray, String[] returnInArray) {
- for (int i = 0; i < matchInArray.length; i++) {
- if (strToMatch.equals(matchInArray[i])) {
- if (i < returnInArray.length) {
- return returnInArray[i];
- } else {
- return null;
- }
- }
- }
- return null;
- }
-
- /**
- * Expands the given number string as pairs as in years or IDs
- *
- * @param numberString the string which is the number to expand
- * @param wordRelation words are added to this Relation
- */
- public static void expandID(String numberString, WordRelation wordRelation) {
-
- int numberDigits = numberString.length();
-
- if ((numberDigits == 4) && (numberString.charAt(2) == '0')
- && (numberString.charAt(3) == '0')) {
- if (numberString.charAt(1) == '0') { // e.g. 2000, 3000
- expandNumber(numberString, wordRelation);
- } else {
- expandNumber(numberString.substring(0, 2), wordRelation);
- wordRelation.addWord("hundred");
- }
- } else if ((numberDigits == 2) && (numberString.charAt(0) == '0')) {
- wordRelation.addWord("oh");
- expandDigits(numberString.substring(1, 2), wordRelation);
- } else if ((numberDigits == 4 && numberString.charAt(1) == '0')
- || numberDigits < 3) {
- expandNumber(numberString, wordRelation);
- } else if (numberDigits % 2 == 1) {
- String firstDigit = digit2num[numberString.charAt(0) - '0'];
- wordRelation.addWord(firstDigit);
- expandID(numberString.substring(1, numberDigits), wordRelation);
- } else {
- expandNumber(numberString.substring(0, 2), wordRelation);
- expandID(numberString.substring(2, numberDigits), wordRelation);
- }
- }
-
- /**
- * Expands the given number string as a real number.
- *
- * @param numberString the string which is the real number to expand
- * @param wordRelation words are added to this Relation
- */
- public static void expandReal(String numberString,
- WordRelation wordRelation) {
-
- int stringLength = numberString.length();
- int position;
-
- if (numberString.charAt(0) == '-') {
- // negative real numbers
- wordRelation.addWord("minus");
- expandReal(numberString.substring(1, stringLength), wordRelation);
- } else if (numberString.charAt(0) == '+') {
- // prefixed with a '+'
- wordRelation.addWord("plus");
- expandReal(numberString.substring(1, stringLength), wordRelation);
- } else if ((position = numberString.indexOf('e')) != -1
- || (position = numberString.indexOf('E')) != -1) {
- // numbers with 'E' or 'e'
- expandReal(numberString.substring(0, position), wordRelation);
- wordRelation.addWord("e");
- expandReal(numberString.substring(position + 1), wordRelation);
- } else if ((position = numberString.indexOf('.')) != -1) {
- // numbers with '.'
- String beforeDot = numberString.substring(0, position);
- if (beforeDot.length() > 0) {
- expandReal(beforeDot, wordRelation);
- }
- wordRelation.addWord("point");
- String afterDot = numberString.substring(position + 1);
- if (afterDot.length() > 0) {
- expandDigits(afterDot, wordRelation);
- }
- } else {
- // everything else
- expandNumber(numberString, wordRelation);
- }
- }
-
- /**
- * Expands the given string of letters as a list of single char symbols.
- *
- * @param letters the string of letters to expand
- * @param wordRelation words are added to this Relation
- */
- public static void expandLetters(String letters, WordRelation wordRelation) {
- letters = letters.toLowerCase();
- char c;
-
- for (int i = 0; i < letters.length(); i++) {
- // if this is a number
- c = letters.charAt(i);
- if (Character.isDigit(c)) {
- wordRelation.addWord(digit2num[c - '0']);
- } else if (letters.equals("a")) {
- wordRelation.addWord("_a");
- } else {
- wordRelation.addWord(String.valueOf(c));
- }
- }
- }
-
- /**
- * Returns the integer value of the given string of Roman numerals.
- *
- * @param roman the string of Roman numbers
- *
- * @return the integer value
- */
- public static int expandRoman(String roman) {
- int value = 0;
-
- for (int p = 0; p < roman.length(); p++) {
- char c = roman.charAt(p);
- if (c == 'X') {
- value += 10;
- } else if (c == 'V') {
- value += 5;
- } else if (c == 'I') {
- if (p + 1 < roman.length()) {
- char p1 = roman.charAt(p + 1);
- if (p1 == 'V') {
- value += 4;
- p++;
- } else if (p1 == 'X') {
- value += 9;
- p++;
- } else {
- value += 1;
- }
- } else {
- value += 1;
- }
- }
- }
- return value;
- }
-
-}
diff --git a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/PathExtractor.java b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/PathExtractor.java
deleted file mode 100755
index 9ee7a42e..00000000
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/PathExtractor.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/**
- * Portions Copyright 2001 Sun Microsystems, Inc.
- * Portions Copyright 1999-2001 Language Technologies Institute,
- * Carnegie Mellon University.
- * All Rights Reserved. Use is subject to license terms.
- *
- * See the file "license.terms" for information on usage and
- * redistribution of this file, and for a DISCLAIMER OF ALL
- * WARRANTIES.
- */
-package edu.cmu.sphinx.alignment.tokenizer;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.StringTokenizer;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Interface that Manages a feature or item path. Allows navigation to the
- * corresponding feature or item. This class in controlled by the following
- * system properties:
- *
- *